Skip to content

Commit c96458f

Browse files
32460 upgraded pypdf2 to pypdf (bcgov#4108)
1 parent 1e6dc6e commit c96458f

8 files changed

Lines changed: 107 additions & 58 deletions

File tree

legal-api/.devcontainer/Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ FROM mcr.microsoft.com/devcontainers/python:3.9-bullseye
22

33

44
RUN \
5+
rm -f /etc/apt/sources.list.d/yarn.list && \
56
apt update && apt install bash-completion postgresql-client -y && \
6-
pip install pre-commit && \
7-
curl -sSL https://install.python-poetry.org | POETRY_HOME=/home/vscode/.local python3 -
7+
pip install --upgrade pip && \
8+
pip install pre-commit poetry && \
9+
poetry config virtualenvs.in-project true
810

911
RUN \
1012
pip install ruff
@@ -19,4 +21,4 @@ RUN \
1921
echo ' . /etc/bash_completion' >> /home/vscode/.bashrc && \
2022
echo 'fi' >> /home/vscode/.bashrc && \
2123
echo >> /home/vscode/.bashrc && \
22-
echo '. <(poetry completions)' >> /home/vscode/.bashrc
24+
echo '. <(poetry completions)' >> /home/vscode/.bashrc

legal-api/.devcontainer/devcontainer.json

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,37 @@
99
// This saves the user a bit of time, when re-opening containers
1010
"dockerfile": "Dockerfile"
1111
},
12-
1312
"features": {
1413
"ghcr.io/devcontainers/features/docker-in-docker:2": {
1514
"version": "latest",
1615
"dockerDashComposeVersion": "v2"
1716
}
1817
},
19-
"runArgs": ["--add-host=host.docker.internal:host-gateway"],
18+
"runArgs": [
19+
"--add-host=host.docker.internal:host-gateway"
20+
],
2021
// this seems to be needed on OSX, at times.
2122
// I have not tested it on windows, but should also work fine on Ubuntu
2223
"mounts": [
23-
{
24-
"source": "/var/run/docker.sock",
25-
"target": "/var/run/docker.sock",
26-
"type": "bind"
27-
}
28-
],
24+
{
25+
"source": "/var/run/docker.sock",
26+
"target": "/var/run/docker.sock",
27+
"type": "bind"
28+
}
29+
],
2930
"containerEnv": {
30-
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python"
31+
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python"
3132
},
3233
"remoteEnv": {
33-
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python"
34+
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION": "python"
3435
},
35-
// "forwardPorts": [5432],
36-
36+
"forwardPorts": [
37+
8080
38+
],
3739
// Features to add to the dev container. More info: https://containers.dev/features.
3840
// "features": {},
39-
4041
// Use 'forwardPorts' to make a list of ports inside the container available locally.
4142
// "forwardPorts": [],
42-
4343
// Use 'postCreateCommand' to run commands after the container is created.
4444
"postCreateCommand": ".devcontainer/commands/post-create-command.sh",
4545
"customizations": {
@@ -49,10 +49,8 @@
4949
]
5050
}
5151
}
52-
5352
// Configure tool-specific properties.
5453
// "customizations": {},
55-
5654
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
5755
// "remoteUser": "root"
58-
}
56+
}

legal-api/poetry.lock

Lines changed: 59 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

legal-api/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies = [
4040
"python-editor (==1.0.4)",
4141
"strict-rfc3339 (==0.7)",
4242
"minio (==7.0.2)",
43-
"PyPDF2 (==1.26.0)",
43+
"pypdf (==6.7.2)",
4444
"reportlab (==3.6.12)",
4545
"html-sanitizer (==2.4.1)",
4646
"lxml (==5.2.2)",

legal-api/src/legal_api/services/filings/validations/common_validations.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
from typing import Final, Optional
2020

2121
import pycountry
22-
import PyPDF2
2322
from flask import current_app, g, request
2423
from flask_babel import _
24+
from pypdf import PdfReader
2525

2626
from legal_api.core.filing import Filing
2727
from legal_api.errors import Error
@@ -397,14 +397,14 @@ def validate_pdf(file_key: str, file_key_path: str, verify_paper_size: bool = Tr
397397
try:
398398
file = MinioService.get_file(file_key)
399399
open_pdf_file = io.BytesIO(file.data)
400-
pdf_reader = PyPDF2.PdfFileReader(open_pdf_file)
400+
pdf_reader = PdfReader(open_pdf_file)
401401

402402
# Check that all pages in the pdf are letter size and able to be processed.
403403
width: Final = 612 # 8.5 inches
404404
height: Final = 792 # 11 inches
405405
if (
406406
verify_paper_size and
407-
any(x.mediaBox.getWidth() != width or x.mediaBox.getHeight() != height for x in pdf_reader.pages)
407+
any(x.mediabox.width != width or x.mediabox.height != height for x in pdf_reader.pages)
408408
):
409409
msg.append({"error": _("Document must be set to fit onto 8.5” x 11” letter-size paper."),
410410
"path": file_key_path})
@@ -414,10 +414,11 @@ def validate_pdf(file_key: str, file_key_path: str, verify_paper_size: bool = Tr
414414
if file_info.size > max_file_size:
415415
msg.append({"error": _("File exceeds maximum size."), "path": file_key_path})
416416

417-
if pdf_reader.isEncrypted:
417+
if pdf_reader.is_encrypted:
418418
msg.append({"error": _("File must be unencrypted."), "path": file_key_path})
419419

420-
except Exception:
420+
except Exception as ex:
421+
current_app.logger.debug(f"Error validating PDF: {ex}")
421422
msg.append({"error": _("Invalid file."), "path": file_key_path})
422423

423424
if msg:

legal-api/src/legal_api/services/furnishing_documents_service.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
import io
1616
from typing import Final
1717

18-
import PyPDF2
1918
from flask import current_app
19+
from pypdf import PdfWriter
2020

2121
from legal_api.models import Furnishing, db
2222
from legal_api.reports.report_v2 import ReportTypes, ReportV2
@@ -79,15 +79,15 @@ def _get_batch_cover(self, files: list) -> bytes:
7979
@staticmethod
8080
def _merge_documents(files: dict) -> bytes:
8181
try:
82-
merger = PyPDF2.PdfFileMerger()
82+
writer = PdfWriter()
8383
if files["cover"]:
84-
merger.append(io.BytesIO(files["cover"]))
84+
writer.append(io.BytesIO(files["cover"]))
8585
contents = files["contents"]
8686
for _, pdf in enumerate(contents):
87-
merger.append(io.BytesIO(pdf))
87+
writer.append(io.BytesIO(pdf))
8888
writer_buffer = io.BytesIO()
89-
merger.write(writer_buffer)
90-
merger.close()
89+
writer.write(writer_buffer)
90+
writer.close()
9191
return writer_buffer.getvalue()
9292
except Exception as e:
9393
current_app.logger.error(f"Error merging PDF:{e}")

legal-api/src/legal_api/services/pdf_service.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from datetime import datetime
1919
from typing import Optional
2020

21-
import PyPDF2
2221
from flask import current_app
22+
from pypdf import PdfReader, PdfWriter
2323
from reportlab.lib.pagesizes import letter
2424
from reportlab.pdfbase import pdfmetrics
2525
from reportlab.pdfbase.ttfonts import TTFont
@@ -51,19 +51,19 @@ def __init__(self):
5151
@staticmethod
5252
def stamp_pdf(input_pdf, watermark, only_first_page=True):
5353
"""Merge two PDFs."""
54-
watermark_obj = PyPDF2.PdfFileReader(watermark)
55-
watermark_page = watermark_obj.getPage(0)
54+
watermark_obj = PdfReader(watermark)
55+
watermark_page = watermark_obj.get_page(0)
5656

57-
pdf_reader = PyPDF2.PdfFileReader(input_pdf)
58-
pdf_writer = PyPDF2.PdfFileWriter()
57+
pdf_reader = PdfReader(input_pdf)
58+
pdf_writer = PdfWriter()
5959

60-
for page_num in range(pdf_reader.getNumPages()):
61-
page = pdf_reader.getPage(page_num)
60+
for page_num in range(pdf_reader.get_num_pages()):
61+
page = pdf_reader.get_page(page_num)
6262

6363
if (only_first_page and page_num == 0) or not only_first_page:
64-
page.mergePage(watermark_page)
64+
page.merge_page(watermark_page)
6565

66-
pdf_writer.addPage(page)
66+
pdf_writer.add_page(page)
6767

6868
output = io.BytesIO()
6969
pdf_writer.write(output)

legal-api/tests/unit/services/test_pdf_service.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"""
1717
import io
1818

19-
import PyPDF2
19+
from pypdf import PdfReader
2020
from reportlab.lib.pagesizes import letter
2121
from reportlab.pdfgen import canvas
2222

@@ -36,15 +36,15 @@ def test_stamp(app): # pylint:disable=unused-argument
3636
registrars_stamp = pdf_service.create_registrars_stamp(registrar_stamp_data)
3737

3838
certified_copy = pdf_service.stamp_pdf(pdf_input, registrars_stamp, only_first_page=True)
39-
certified_copy_obj = PyPDF2.PdfFileReader(certified_copy)
39+
certified_copy_obj = PdfReader(certified_copy)
4040

41-
certified_copy_page = certified_copy_obj.getPage(0)
42-
text = certified_copy_page.extractText()
41+
certified_copy_page = certified_copy_obj.get_page(0)
42+
text = certified_copy_page.extract_text()
4343
assert 'Filed on' in text
4444
assert 'File Name: rules.pdf' in text
4545

46-
certified_copy_page = certified_copy_obj.getPage(1)
47-
text = certified_copy_page.extractText()
46+
certified_copy_page = certified_copy_obj.get_page(1)
47+
text = certified_copy_page.extract_text()
4848
assert 'Filed on' not in text
4949

5050
# Uncomment to generate the file:

0 commit comments

Comments
 (0)