Skip to content

Commit a3090e7

Browse files
♻️ move crop and split extractors to result level and update function names
1 parent f47e533 commit a3090e7

18 files changed

Lines changed: 99 additions & 108 deletions

docs/v2/product/crop/index.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,6 @@ Crop
88

99
./params
1010

11-
Crop Box
12-
========
13-
.. autoclass:: mindee.v2.product.crop.crop_box.CropBox
14-
:members:
15-
:inherited-members:
16-
1711
Crop Item
1812
=========
1913
.. autoclass:: mindee.v2.product.crop.crop_item.CropItem

mindee/v2/__init__.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from mindee.v2.client import Client
22
from mindee.v2.file_operations.crop import (
3-
extract_crops,
3+
extract_multiple_crops,
44
extract_single_crop,
55
)
6-
from mindee.v2.file_operations.split import extract_splits
6+
from mindee.v2.file_operations.split import extract_multiple_splits
77
from mindee.v2.product.classification.classification_response import (
88
ClassificationResponse,
99
)
@@ -33,8 +33,8 @@
3333
"OCRResponse",
3434
"SplitParameters",
3535
"SplitResponse",
36-
"extract_crops",
37-
"extract_crops",
36+
"extract_multiple_crops",
37+
"extract_multiple_crops",
38+
"extract_multiple_splits",
3839
"extract_single_crop",
39-
"extract_splits",
4040
]
Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
from mindee.v2.file_operations.crop import (
2-
extract_crops,
2+
extract_multiple_crops,
33
extract_single_crop,
44
)
5-
from mindee.v2.file_operations.split import extract_splits
5+
from mindee.v2.file_operations.split import extract_multiple_splits
66

7-
__all__ = ["extract_crops", "extract_crops", "extract_single_crop", "extract_splits"]
7+
__all__ = [
8+
"extract_multiple_crops",
9+
"extract_multiple_crops",
10+
"extract_multiple_splits",
11+
"extract_single_crop",
12+
]

mindee/v2/file_operations/crop.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mindee.input.local_input_source import LocalInputSource
66
from mindee.v2.file_operations.crop_files import CropFiles
77
from mindee.v2.parsing.inference.field import FieldLocation
8-
from mindee.v2.product.crop.crop_box import CropBox
8+
from mindee.v2.product.crop.crop_item import CropItem
99

1010

1111
def extract_single_crop(
@@ -23,7 +23,9 @@ def extract_single_crop(
2323
return extract_multiple_images_from_source(input_source, crop.page, polygons)[0]
2424

2525

26-
def extract_crops(input_source: LocalInputSource, crops: list[CropBox]) -> CropFiles:
26+
def extract_multiple_crops(
27+
input_source: LocalInputSource, crops: list[CropItem]
28+
) -> CropFiles:
2729
"""
2830
Extracts individual receipts from multi-receipts documents.
2931

mindee/v2/file_operations/split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ def extract_single_split(
1515
:param split: List of pages to keep.
1616
:return: Extracted PDF
1717
"""
18-
return extract_splits(input_source, [split])[0]
18+
return extract_multiple_splits(input_source, [split])[0]
1919

2020

21-
def extract_splits(
21+
def extract_multiple_splits(
2222
input_source: LocalInputSource,
2323
splits: list[list[int]],
2424
) -> SplitFiles:

mindee/v2/product/crop/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
from mindee.v2.product.crop.crop_box import CropBox
21
from mindee.v2.product.crop.crop_inference import CropInference
32
from mindee.v2.product.crop.crop_item import CropItem
43
from mindee.v2.product.crop.crop_response import CropResponse
54
from mindee.v2.product.crop.crop_result import CropResult
65
from mindee.v2.product.crop.params.crop_parameters import CropParameters
76

87
__all__ = [
9-
"CropBox",
108
"CropInference",
119
"CropItem",
10+
"CropItem",
1211
"CropParameters",
1312
"CropResponse",
1413
"CropResult",

mindee/v2/product/crop/crop_box.py

Lines changed: 0 additions & 41 deletions
This file was deleted.
Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,43 @@
1-
from mindee.v2.product.crop.crop_box import CropBox
1+
from mindee.image.extracted_image import ExtractedImage
2+
from mindee.image.image_extractor import extract_multiple_images_from_source
3+
from mindee.input.local_input_source import LocalInputSource
4+
from mindee.parsing.common import StringDict
5+
from mindee.v2.parsing.inference.field import FieldLocation
6+
from mindee.v2.product.extraction.extraction_response import ExtractionResponse
27

38

4-
class CropItem(CropBox):
5-
"""Result of a cropped document region."""
9+
class CropItem:
10+
"""Deprecated class. Use CropItem instead."""
11+
12+
location: FieldLocation
13+
"""Location which includes cropping coordinates for the detected object, within the source document."""
14+
15+
object_type: str
16+
"""Type or classification of the detected object."""
17+
18+
extraction_response: ExtractionResponse | None = None
19+
"""The extraction response associated with the crop."""
20+
21+
def __init__(self, server_response: StringDict):
22+
self.location = FieldLocation(server_response["location"])
23+
self.object_type = server_response["object_type"]
24+
if server_response.get("extraction_response") is not None:
25+
self.extraction_response = ExtractionResponse(
26+
server_response["extraction_response"]
27+
)
28+
29+
def __str__(self) -> str:
30+
return f"* :Location: {self.location}\n :Object Type: {self.object_type}"
31+
32+
def extract_from_input_source(
33+
self, input_source: LocalInputSource
34+
) -> ExtractedImage:
35+
"""
36+
Apply the split range inference to a file and return a single extracted PDF.
37+
38+
:param input_source: Local file to apply the inference to
39+
:return: Extracted PDF
40+
"""
41+
return extract_multiple_images_from_source(
42+
input_source, self.location.page, [self.location.polygon]
43+
)[0]
Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
from mindee.input.local_input_source import LocalInputSource
21
from mindee.parsing.common import StringDict
3-
from mindee.v2.file_operations.crop_files import CropFiles
42
from mindee.v2.parsing.inference.base_response import BaseResponse
53
from mindee.v2.product.crop.crop_inference import CropInference
64

@@ -17,17 +15,3 @@ class CropResponse(BaseResponse):
1715
def __init__(self, raw_response: StringDict) -> None:
1816
super().__init__(raw_response)
1917
self.inference = CropInference(raw_response["inference"])
20-
21-
def extract_from_file(self, input_source: LocalInputSource) -> CropFiles:
22-
"""
23-
Apply the crop inference to a file and return a list of extracted images.
24-
25-
:param input_source: Local file to apply the inference to
26-
:return: List of extracted PDFs
27-
"""
28-
return CropFiles(
29-
[
30-
crop.extract_from_file(input_source)
31-
for crop in self.inference.result.crops
32-
]
33-
)

mindee/v2/product/crop/crop_result.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
from mindee.input.local_input_source import LocalInputSource
12
from mindee.parsing.common import StringDict
3+
from mindee.v2.file_operations.crop import extract_multiple_crops
4+
from mindee.v2.file_operations.crop_files import CropFiles
25
from mindee.v2.product.crop.crop_item import CropItem
36

47

@@ -16,3 +19,11 @@ def __str__(self) -> str:
1619
crops += "\n".join([str(crop) for crop in self.crops])
1720
out_str = f"Crops\n====={crops}"
1821
return out_str
22+
23+
def extract_from_input_source(self, input_source: LocalInputSource) -> CropFiles:
24+
"""
25+
Apply all the crops to a file and return a single extracted PDF.
26+
27+
:param input_source: Input file
28+
"""
29+
return extract_multiple_crops(input_source, self.crops)

0 commit comments

Comments
 (0)