Skip to content

Commit 4f306cb

Browse files
committed
Added support for image URLs
1 parent 8463b13 commit 4f306cb

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

pytesseract/pytesseract.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,10 @@ def save(image):
209209
try:
210210
with NamedTemporaryFile(prefix='tess_', delete=False) as f:
211211
if isinstance(image, str):
212-
yield f.name, realpath(normpath(normcase(image)))
212+
if image.startswith('http:') or image.startswith('https:'):
213+
yield f.name, image
214+
else:
215+
yield f.name, realpath(normpath(normcase(image)))
213216
return
214217
image, extension = prepare(image)
215218
input_file_name = f'{f.name}_input{extsep}{extension}'

tests/pytesseract_test.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
DATA_DIR = path.join(TESTS_DIR, 'data')
5151
TESSDATA_DIR = path.join(TESTS_DIR, 'tessdata')
5252
TEST_JPEG = path.join(DATA_DIR, 'test.jpg')
53+
TEST_JPEG_URL = 'https://i.imgur.com/hWO45US.jpg'
5354

5455
pytestmark = pytest.mark.pytesseract # used marker for the module
5556
string_type = unicode if IS_PYTHON_2 else str # noqa: 821
@@ -121,6 +122,18 @@ def test_image_to_string_with_image_type(test_file):
121122
assert 'The quick brown dog' in image_to_string(test_file_path, 'eng')
122123

123124

125+
@pytest.mark.parametrize(
126+
'test_file',
127+
[TEST_JPEG_URL],
128+
ids=['jpeg_url'],
129+
)
130+
def test_image_to_string_with_url(test_file):
131+
# Tesseract-ocr supports image URLs from version 4.1.1
132+
if TESSERACT_VERSION[0] < 4:
133+
pytest.skip('skip url test')
134+
assert 'The quick brown dog' in image_to_string(test_file)
135+
136+
124137
@pytest.mark.parametrize(
125138
'test_file',
126139
[TEST_JPEG, Image.open(TEST_JPEG)],

0 commit comments

Comments
 (0)