Our entire receipt parsing system is currently down, so excuse the terse error report, but I will quickly post this here while I try to work around the issue on our side. I will come back and clean up this issue as soon as I have extinguished some fires on our end.
File ~/crdbrd/hub/src/hub/receipts/mindee.py:108, in parse(file, media_type)
103 raise exceptions.ReceiptParsingError from exc
105 file.seek(0)
106 parser_result = cast(
107 PredictResponse[InvoiceV4],
--> 108 client.parse( # pyright: ignore[reportUnknownMemberType]
109 product_class=InvoiceV4,
110 input_source=input_source,
111 ),
112 )
113 document = cast(
114 Document[InvoiceV4Document, Page[InvoiceV4Document]] | None,
115 parser_result.document,
116 )
117 if document is None:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:126, in Client.parse(self, product_class, input_source, include_words, close_file, page_options, cropper, endpoint, full_text)
120 if page_options and input_source.is_pdf():
121 input_source.process_pdf(
122 page_options.operation,
123 page_options.on_min_pages,
124 page_options.page_indexes,
125 )
--> 126 return self._make_request(
127 product_class,
128 input_source,
129 endpoint,
130 include_words,
131 close_file,
132 cropper,
133 full_text,
134 )
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/client.py:427, in Client._make_request(self, product_class, input_source, endpoint, include_words, close_file, cropper, full_text)
421 clean_response = clean_request_json(response)
422 raise handle_error(
423 str(product_class.endpoint_name),
424 clean_response,
425 )
--> 427 return PredictResponse(product_class, dict_response)
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/predict_response.py:28, in PredictResponse.__init__(self, inference_type, raw_response)
21 """
22 Container for the raw API response and the parsed document.
23
24 :param inference_type: Type of the inference.
25 :param raw_response: json response from HTTP call.
26 """
27 super().__init__(raw_response)
---> 28 self.document = Document(inference_type, raw_response["document"])
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:50, in Document.__init__(self, inference_type, raw_response)
48 if "extras" in raw_response and raw_response["inference"]["extras"]:
49 self.extras = Extras(raw_response["extras"])
---> 50 self._inject_full_text_ocr(raw_response)
51 self.inference = inference_type(raw_response["inference"])
52 self.n_pages = raw_response["n_pages"]
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:72, in Document._inject_full_text_ocr(self, raw_prediction)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
---> 72 full_text_content = "\n".join(
73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
File ~/crdbrd/hub/.venv/lib/python3.13/site-packages/mindee/parsing/common/document.py:73, in <genexpr>(.0)
65 if (
66 not pages
67 or "extras" not in pages[0]
68 or "full_text_ocr" not in pages[0]["extras"]
69 ):
70 return
72 full_text_content = "\n".join(
---> 73 page["extras"]["full_text_ocr"]["content"]
74 for page in pages
75 if "extras" in page and "full_text_ocr" in page["extras"]
76 )
78 artificial_text_obj = {"content": full_text_content}
80 if not hasattr(self, "extras") or not self.extras:
TypeError: 'NoneType' object is not subscriptable
These keys should not have been inserted in the response or the client should check for None before processing the data.
Our entire receipt parsing system is currently down, so excuse the terse error report, but I will quickly post this here while I try to work around the issue on our side. I will come back and clean up this issue as soon as I have extinguished some fires on our end.
Here is (a really simplified) example for how we use the Mindee SDK:
Here is a screenshot that shows Mindee's JSON API response:
This response is not gracefully handled by the Python SDK client for mindee:
These keys should not have been inserted in the response or the client should check for
Nonebefore processing the data.