Skip to content

Commit 78dc5c2

Browse files
committed
✨ Use objects for custom doc fields
1 parent cf53299 commit 78dc5c2

37 files changed

Lines changed: 686 additions & 572 deletions

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ Finally, Python away!
1717
```python
1818
from mindee import Client
1919

20-
# Init a new client and configure the Invoice API
21-
mindee_client = Client(api_key="my-api-key").config_invoice()
20+
# Init a new client
21+
mindee_client = Client(api_key="my-api-key")
2222

23-
# Load a file from disk and parse it
23+
# Load a file from disk and parse it as an invoice
2424
api_response = mindee_client.doc_from_path("/path/to/the/invoice.pdf").parse("invoice")
2525

2626
# Print a brief summary of the parsed data
@@ -32,8 +32,8 @@ print(api_response.document)
3232
```python
3333
from mindee import Client
3434

35-
# Init a new client and configure your custom document
36-
mindee_client = Client(api_key="my-api-key").config_custom_doc(
35+
# Init a new client and add your custom endpoint (document)
36+
mindee_client = Client(api_key="my-api-key").add_endpoint(
3737
account_name="john",
3838
endpoint_name="wnine",
3939
)

mindee/__main__.py

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,6 @@
3030
}
3131

3232

33-
def _ots_client(args: Namespace, info: dict):
34-
client = Client(api_key=args.api_key, raise_on_error=args.raise_on_error)
35-
func = getattr(client, f"config_{info['doc_type']}")
36-
func()
37-
return client
38-
39-
40-
def _custom_client(args: Namespace):
41-
client = Client(api_key=args.api_key, raise_on_error=args.raise_on_error)
42-
client.config_custom_doc(
43-
endpoint_name=args.doc_type,
44-
account_name=args.username,
45-
)
46-
return client
47-
48-
4933
def _get_input_doc(client, args) -> DocumentClient:
5034
if args.input_type == "file":
5135
with open(args.path, "rb", buffering=30) as file_handle:
@@ -61,12 +45,15 @@ def _get_input_doc(client, args) -> DocumentClient:
6145

6246
def call_endpoint(args: Namespace):
6347
"""Call the endpoint given passed arguments."""
48+
client = Client(api_key=args.api_key, raise_on_error=args.raise_on_error)
6449
if args.product_name == "custom":
65-
client = _custom_client(args)
50+
client.add_endpoint(
51+
endpoint_name=args.doc_type,
52+
account_name=args.username,
53+
)
6654
doc_type = args.doc_type
6755
else:
6856
info = DOCUMENTS[args.product_name]
69-
client = _ots_client(args, info)
7057
doc_type = info["doc_type"]
7158

7259
input_doc = _get_input_doc(client, args)

mindee/client.py

Lines changed: 59 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,7 @@
88
from mindee.documents.invoice import Invoice
99
from mindee.documents.passport import Passport
1010
from mindee.documents.receipt import Receipt
11-
from mindee.endpoints import (
12-
OTS_OWNER,
13-
BankCheckEndpoint,
14-
CustomEndpoint,
15-
HTTPException,
16-
InvoiceEndpoint,
17-
PassportEndpoint,
18-
ReceiptEndpoint,
19-
)
11+
from mindee.endpoints import OTS_OWNER, CustomEndpoint, HTTPException, StandardEndpoint
2012
from mindee.input.page_options import PageOptions
2113
from mindee.input.sources import (
2214
Base64Input,
@@ -91,7 +83,7 @@ def parse(
9183
doc_config.check_api_keys()
9284
if page_options and self.input_doc.is_pdf():
9385
self.input_doc.process_pdf(
94-
page_options.behavior,
86+
page_options.operation,
9587
page_options.on_min_pages,
9688
page_options.page_indexes,
9789
)
@@ -100,7 +92,7 @@ def parse(
10092
def _make_request(
10193
self, doc_config: DocumentConfig, include_words: bool, close_file: bool
10294
) -> PredictResponse:
103-
response = doc_config.constructor.request(
95+
response = doc_config.document_class.request(
10496
doc_config.endpoints,
10597
self.input_doc,
10698
include_words=include_words,
@@ -147,15 +139,68 @@ def __init__(self, api_key: str = "", raise_on_error: bool = True):
147139
self._doc_configs: Dict[tuple, DocumentConfig] = {}
148140
self.raise_on_error = raise_on_error
149141
self.api_key = api_key
150-
151-
def config_custom_doc(
142+
self._init_default_endpoints()
143+
144+
def _init_default_endpoints(self) -> None:
145+
self._doc_configs = {
146+
(OTS_OWNER, "invoice"): DocumentConfig(
147+
document_type="invoice",
148+
constructor=Invoice,
149+
endpoints=[
150+
StandardEndpoint(
151+
url_name="invoices", version="3", api_key=self.api_key
152+
)
153+
],
154+
),
155+
(OTS_OWNER, "receipt"): DocumentConfig(
156+
document_type="receipt",
157+
constructor=Receipt,
158+
endpoints=[
159+
StandardEndpoint(
160+
url_name="expense_receipts", version="3", api_key=self.api_key
161+
)
162+
],
163+
),
164+
(OTS_OWNER, "financial_doc"): DocumentConfig(
165+
document_type="financial_doc",
166+
constructor=FinancialDocument,
167+
endpoints=[
168+
StandardEndpoint(
169+
url_name="invoices", version="3", api_key=self.api_key
170+
),
171+
StandardEndpoint(
172+
url_name="expense_receipts", version="3", api_key=self.api_key
173+
),
174+
],
175+
),
176+
(OTS_OWNER, "passport"): DocumentConfig(
177+
document_type="passport",
178+
constructor=Passport,
179+
endpoints=[
180+
StandardEndpoint(
181+
url_name="passport", version="1", api_key=self.api_key
182+
)
183+
],
184+
),
185+
(OTS_OWNER, "bank_check"): DocumentConfig(
186+
document_type="bank_check",
187+
constructor=BankCheck,
188+
endpoints=[
189+
StandardEndpoint(
190+
url_name="bank_check", version="1", api_key=self.api_key
191+
)
192+
],
193+
),
194+
}
195+
196+
def add_endpoint(
152197
self,
153198
account_name: str,
154199
endpoint_name: str,
155200
version: str = "1",
156201
) -> "Client":
157202
"""
158-
Configure a custom document using the Mindee API Builder.
203+
Add a custom endpoint, created using the Mindee API Builder.
159204
160205
:param endpoint_name: The "API name" field in the "Settings" page of the API Builder
161206
:param account_name: Your organization's username on the API Builder
@@ -176,61 +221,6 @@ def config_custom_doc(
176221
)
177222
return self
178223

179-
def config_invoice(self) -> "Client":
180-
"""Configure a Mindee Invoice document."""
181-
config = DocumentConfig(
182-
document_type="invoice",
183-
constructor=Invoice,
184-
endpoints=[InvoiceEndpoint(api_key=self.api_key)],
185-
)
186-
self._doc_configs[(OTS_OWNER, "invoice")] = config
187-
return self
188-
189-
def config_receipt(self) -> "Client":
190-
"""Configure a Mindee Expense Receipts document."""
191-
config = DocumentConfig(
192-
document_type="receipt",
193-
constructor=Receipt,
194-
endpoints=[ReceiptEndpoint(api_key=self.api_key)],
195-
)
196-
self._doc_configs[(OTS_OWNER, "receipt")] = config
197-
return self
198-
199-
def config_financial_doc(
200-
self,
201-
) -> "Client":
202-
"""Configure a Mindee Financial document. Uses Invoice and Expense Receipt internally."""
203-
config = DocumentConfig(
204-
document_type="financial_doc",
205-
constructor=FinancialDocument,
206-
endpoints=[
207-
InvoiceEndpoint(api_key=self.api_key),
208-
ReceiptEndpoint(api_key=self.api_key),
209-
],
210-
)
211-
self._doc_configs[(OTS_OWNER, "financial_doc")] = config
212-
return self
213-
214-
def config_passport(self) -> "Client":
215-
"""Configure a Mindee Passport document."""
216-
config = DocumentConfig(
217-
document_type="passport",
218-
constructor=Passport,
219-
endpoints=[PassportEndpoint(api_key=self.api_key)],
220-
)
221-
self._doc_configs[(OTS_OWNER, "passport")] = config
222-
return self
223-
224-
def config_bank_check(self) -> "Client":
225-
"""Configure a Mindee Bank check document."""
226-
config = DocumentConfig(
227-
document_type="bank_check",
228-
constructor=BankCheck,
229-
endpoints=[BankCheckEndpoint(api_key=self.api_key)],
230-
)
231-
self._doc_configs[(OTS_OWNER, "bank_check")] = config
232-
return self
233-
234224
def doc_from_path(
235225
self,
236226
input_path: str,

mindee/document_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class DocumentConfig:
88
document_type: str
99
endpoints: List[Endpoint]
10-
constructor: TypeDocument
10+
document_class: TypeDocument
1111

1212
def __init__(
1313
self,
@@ -16,7 +16,7 @@ def __init__(
1616
endpoints: List[Endpoint],
1717
):
1818
self.document_type = document_type
19-
self.constructor = constructor
19+
self.document_class = constructor
2020
self.endpoints = endpoints
2121

2222
def check_api_keys(self) -> None:
@@ -25,7 +25,7 @@ def check_api_keys(self) -> None:
2525
if not endpoint.api_key:
2626
raise RuntimeError(
2727
(
28-
f"Missing API key for '{endpoint.key_name}',"
28+
f"Missing API key for '{self.document_type}',"
2929
"check your Client configuration.\n"
3030
"You can set this using the "
3131
f"'{MINDEE_API_KEY_NAME}' environment variable."

mindee/documents/bank_check.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
from typing import List, Optional
22

33
from mindee.documents.base import Document, TypeApiPrediction
4-
from mindee.fields.amount import Amount
5-
from mindee.fields.base import Field
6-
from mindee.fields.date import Date
4+
from mindee.fields.amount import AmountField
5+
from mindee.fields.date import DateField
76
from mindee.fields.orientation import Orientation
87
from mindee.fields.position import Position
8+
from mindee.fields.text import TextField
99

1010

1111
class BankCheck(Document):
12-
date: Date
12+
date: DateField
1313
"""Date the check was issued"""
14-
amount: Amount
14+
amount: AmountField
1515
"""Total including taxes"""
16-
payees: List[Field]
16+
payees: List[TextField]
1717
"""List of payees (full name or company name)"""
18-
check_number: Field
18+
check_number: TextField
1919
"""Check number"""
20-
routing_number: Field
20+
routing_number: TextField
2121
"""Payer's bank account routing number"""
22-
account_number: Field
22+
account_number: TextField
2323
"""Payer's bank account number"""
24-
check_position: Field
24+
check_position: Position
2525
"""Check's position in the image"""
26-
signatures_positions: List[Field]
26+
signatures_positions: List[Position]
2727
"""Signatures' positions in the image"""
2828
# orientation is only present on page-level, not document-level
2929
orientation: Optional[Orientation] = None
@@ -62,13 +62,15 @@ def _build_from_api_prediction(
6262
if page_n is not None:
6363
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
6464

65-
self.routing_number = Field(api_prediction["routing_number"], page_n=page_n)
66-
self.account_number = Field(api_prediction["account_number"], page_n=page_n)
67-
self.check_number = Field(api_prediction["check_number"], page_n=page_n)
68-
self.date = Date(api_prediction["date"], "value", page_n=page_n)
69-
self.amount = Amount(api_prediction["amount"], value_key="value", page_n=page_n)
65+
self.routing_number = TextField(api_prediction["routing_number"], page_n=page_n)
66+
self.account_number = TextField(api_prediction["account_number"], page_n=page_n)
67+
self.check_number = TextField(api_prediction["check_number"], page_n=page_n)
68+
self.date = DateField(api_prediction["date"], "value", page_n=page_n)
69+
self.amount = AmountField(
70+
api_prediction["amount"], value_key="value", page_n=page_n
71+
)
7072
self.payees = [
71-
Field(payee, page_n=page_n) for payee in api_prediction["payees"]
73+
TextField(payee, page_n=page_n) for payee in api_prediction["payees"]
7274
]
7375
self.check_position = Position(api_prediction["check_position"], page_n=page_n)
7476
self.signatures_positions = [

mindee/documents/custom_document.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
from typing import Dict, Optional
22

33
from mindee.documents.base import Document, TypeApiPrediction
4+
from mindee.fields.api_builder import ClassificationField, ListField
45

56

67
class CustomDocument(Document):
7-
fields: Dict[str, dict]
8+
fields: Dict[str, ListField]
89
"""Dictionary of all fields in the document"""
9-
classifications: Dict[str, dict]
10+
classifications: Dict[str, ClassificationField]
1011
"""Dictionary of all classifications in the document"""
1112

1213
def __init__(
@@ -46,24 +47,20 @@ def _build_from_api_prediction(
4647
field = api_prediction[field_name]
4748
# Only classifications have the 'value' attribute.
4849
if "value" in field:
49-
self.classifications[field_name] = field
50+
self.classifications[field_name] = ClassificationField(prediction=field)
5051
# Only value lists have the 'values' attribute.
5152
elif "values" in field:
5253
field["page_n"] = page_n
53-
self.fields[field_name] = field
54-
setattr(self, field_name, field)
54+
self.fields[field_name] = ListField(prediction=field, page_n=page_n)
5555

5656
def __str__(self) -> str:
5757
custom_doc_str = (
5858
f"----- {self.type} -----\nFilename: {self.filename or ''}".rstrip() + "\n"
5959
)
60-
for name, info in self.classifications.items():
61-
custom_doc_str += f"{name}: {info['value']}\n"
62-
for name, info in self.fields.items():
63-
custom_doc_str += "%s: %s\n" % (
64-
name,
65-
" ".join([value["content"] for value in info["values"]]),
66-
)
60+
for class_name, class_info in self.classifications.items():
61+
custom_doc_str += f"{class_name}: {class_info}\n"
62+
for field_name, field_info in self.fields.items():
63+
custom_doc_str += f"{field_name}: {field_info}\n"
6764
custom_doc_str += "----------------------"
6865
return custom_doc_str
6966

0 commit comments

Comments
 (0)