From a71990a051b4a895915947d80116e2167e2ea8c4 Mon Sep 17 00:00:00 2001 From: Sathish Kannaian Date: Tue, 14 Feb 2023 23:39:54 -0800 Subject: [PATCH 1/2] Create Issues_log.txt Issues_log tracking --- python/queries/Issues_log.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 python/queries/Issues_log.txt diff --git a/python/queries/Issues_log.txt b/python/queries/Issues_log.txt new file mode 100644 index 0000000..0106160 --- /dev/null +++ b/python/queries/Issues_log.txt @@ -0,0 +1,14 @@ +Date: 02/14/2023 +Issue: I have a multiple pages PDF and 1st page is landscape orientation, which is the envelope cover with some text like "Presorted" etc. +When I run through the PDF, I am getting the below error since from the output of "d = t2.TDocumentSchema().load(response)" line it is expecting +"block_type = 'PAGE'" in the next line of code "page = d.pages[0]". I get the below error, but when I remove the first page and run, I get proper queries output. +Please help. + +======== +Error: +File "/opt/python/trp/trp2.py", line 631, in pages + page_blocks = self.block_map(TextractBlockTypes.PAGE).values() + File "/opt/python/trp/trp2.py", line 491, in block_map + return {k: self.blocks[v] for k, v in self._block_id_maps[block_type.name].items()} +[ERROR] KeyError: 'PAGE' Traceback (most recent call last): File "/var/task/lambda_function.py", line 199, in lambda_handler raise e File "/var/task/lambda_function.py", line 175, in lambda_handler paths = process_response( File "/var/task/helper/helper.py", line 240, in process_response page = d.pages[0] File "/opt/python/trp/trp2.py", line 631, in pages page_blocks = self.block_map(TextractBlockTypes.PAGE).values() File "/opt/python/trp/trp2.py", line 491, in block_map return {k: self.blocks[v] for k, v in self._block_id_maps[block_type.name].items()} +======= From 3eb4c42e0c3dac9f66aadc9b9587f8a2ceb7fd49 Mon Sep 17 00:00:00 2001 From: Sathish Kannaian Date: Tue, 14 Feb 2023 23:49:02 -0800 Subject: [PATCH 2/2] Update Issues_log.txt --- python/queries/Issues_log.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/queries/Issues_log.txt b/python/queries/Issues_log.txt index 0106160..4c47f1d 100644 --- a/python/queries/Issues_log.txt +++ b/python/queries/Issues_log.txt @@ -1,8 +1,7 @@ Date: 02/14/2023 Issue: I have a multiple pages PDF and 1st page is landscape orientation, which is the envelope cover with some text like "Presorted" etc. When I run through the PDF, I am getting the below error since from the output of "d = t2.TDocumentSchema().load(response)" line it is expecting -"block_type = 'PAGE'" in the next line of code "page = d.pages[0]". I get the below error, but when I remove the first page and run, I get proper queries output. -Please help. +"block_type = 'PAGE'" in the next line of code "page = d.pages[0]". I get the below error, but when I remove the first page and run, I get proper queries output. I am guessing if the code "t2.TDocumentSchema().load(response)" is not handling multi page pdf? Please advise. ======== Error: