|
14 | 14 | from pytesseract import ALTONotSupported |
15 | 15 | from pytesseract import get_languages |
16 | 16 | from pytesseract import get_tesseract_version |
| 17 | +from pytesseract import has_libcurl |
17 | 18 | from pytesseract import image_to_alto_xml |
18 | 19 | from pytesseract import image_to_boxes |
19 | 20 | from pytesseract import image_to_data |
|
24 | 25 | from pytesseract import run_and_get_multiple_output |
25 | 26 | from pytesseract import TesseractNotFoundError |
26 | 27 | from pytesseract import TSVNotSupported |
| 28 | +from pytesseract import URLNotSupported |
27 | 29 | from pytesseract.pytesseract import file_to_dict |
28 | 30 | from pytesseract.pytesseract import numpy_installed |
29 | 31 | from pytesseract.pytesseract import pandas_installed |
|
45 | 47 | IS_PYTHON_3 = not IS_PYTHON_2 |
46 | 48 |
|
47 | 49 | TESSERACT_VERSION = tuple(get_tesseract_version().release) # to skip tests |
| 50 | +HAS_LIBCURL = has_libcurl() # to skip tests |
48 | 51 |
|
49 | 52 | TESTS_DIR = path.dirname(path.abspath(__file__)) |
50 | 53 | DATA_DIR = path.join(TESTS_DIR, 'data') |
51 | 54 | TESSDATA_DIR = path.join(TESTS_DIR, 'tessdata') |
52 | 55 | TEST_JPEG = path.join(DATA_DIR, 'test.jpg') |
53 | | -TEST_JPEG_URL = 'https://i.imgur.com/hWO45US.jpg' |
| 56 | +TEST_JPEG_URL = ('https://github.com/madmaze/pytesseract' |
| 57 | + '/blob/master/tests/data/test.jpg?raw=true') |
54 | 58 |
|
55 | 59 | pytestmark = pytest.mark.pytesseract # used marker for the module |
56 | 60 | string_type = unicode if IS_PYTHON_2 else str # noqa: 821 |
@@ -128,8 +132,9 @@ def test_image_to_string_with_image_type(test_file): |
128 | 132 | ids=['jpeg_url'], |
129 | 133 | ) |
130 | 134 | def test_image_to_string_with_url(test_file): |
131 | | - # Tesseract-ocr supports image URLs from version 4.1.1 |
132 | | - if TESSERACT_VERSION[0] < 4: |
| 135 | + # Tesseract-ocr supports image URLs from version 4.1.1 |
| 136 | + # and must be built with libcurl. |
| 137 | + if TESSERACT_VERSION < (4, 1, 1) or not HAS_LIBCURL: |
133 | 138 | pytest.skip('skip url test') |
134 | 139 | assert 'The quick brown dog' in image_to_string(test_file) |
135 | 140 |
|
@@ -311,6 +316,15 @@ def test_image_to_data__pandas_support(test_file_small): |
311 | 316 | image_to_data(test_file_small, output_type=Output.DATAFRAME) |
312 | 317 |
|
313 | 318 |
|
| 319 | +@pytest.mark.skipif( |
| 320 | + TESSERACT_VERSION >= (4, 1, 1) and HAS_LIBCURL, |
| 321 | + reason='requires tesseract < 4.1.1 or tesseract built without libcurl', |
| 322 | +) |
| 323 | +def test_image_to_string_url_support(): |
| 324 | + with pytest.raises(URLNotSupported): |
| 325 | + image_to_string(TEST_JPEG_URL) |
| 326 | + |
| 327 | + |
314 | 328 | @pytest.mark.skipif( |
315 | 329 | TESSERACT_VERSION[:2] < (3, 5), |
316 | 330 | reason='requires tesseract >= 3.05', |
|
0 commit comments