Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion formfyxer/pdf_wrangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,11 @@ def from_pikefield(cls, pike_field: PikeField) -> "FormField":
)

def get_bbox(self) -> BoundingBoxF:
if self.type == FieldType.TEXT or self.type == FieldType.AREA:
if (
self.type == FieldType.TEXT
or self.type == FieldType.AREA
or self.type == FieldType.SIGNATURE
):
return (
self.x,
self.y,
Expand Down Expand Up @@ -340,6 +344,55 @@ def _create_only_fields(
c.save()


def _normalize_signature_fields(pdf: Pdf, signature_field_names: Iterable[str]) -> None:
"""Convert ReportLab text fields into PDF signature fields.

ReportLab does not expose an AcroForm signature widget API, so
``_create_only_fields`` creates signature placeholders as text fields. This
function changes those generated widgets to ``/FT /Sig`` before they are
copied into the destination PDF.
"""
signature_names = {
str(field_name).strip()
for field_name in signature_field_names
if str(field_name).strip()
}
if not signature_names or not hasattr(pdf.Root, "AcroForm"):
return

converted = 0

def _normalize_object(obj: Any) -> bool:
nonlocal converted
if obj is None or not hasattr(obj, "get"):
return False
field_name = str(obj.get("/T", "") or "").strip()
if field_name not in signature_names:
return False
if str(obj.get("/FT", "")) != "/Sig":
converted += 1
obj["/FT"] = pikepdf.Name("/Sig")
for key in ("/V", "/DV", "/MaxLen", "/Q", "/DS", "/RV"):
if key in obj:
del obj[key]
return True

for field in pdf.Root.AcroForm.get("/Fields", []):
if _normalize_object(field):
continue
if hasattr(field, "Kids"):
for kid in field.Kids:
if _normalize_object(kid):
kid["/FT"] = pikepdf.Name("/Sig")

if converted:
try:
existing_flags = int(pdf.Root.AcroForm.get("/SigFlags", 0) or 0)
except (TypeError, ValueError):
existing_flags = 0
pdf.Root.AcroForm["/SigFlags"] = existing_flags | 3


def set_fields(
in_file: Union[str, Path, BinaryIO],
out_file: Union[str, Path, BinaryIO],
Expand Down Expand Up @@ -384,6 +437,7 @@ def set_fields(
if not fields_per_page:
# Nothing to do, lol
return
fields_per_page = [list(page_fields) for page_fields in fields_per_page]
in_pdf = Pdf.open(in_file, allow_overwriting_input=overwrite)
if hasattr(in_pdf.Root, "AcroForm") and not overwrite:
print("Not going to overwrite the existing AcroForm!")
Expand All @@ -392,6 +446,13 @@ def set_fields(
io_obj = io.BytesIO()
_create_only_fields(io_obj, fields_per_page)
temp_pdf = Pdf.open(io_obj)
signature_field_names = [
field.name
for page_fields in fields_per_page
for field in page_fields
if field.type == FieldType.SIGNATURE
]
_normalize_signature_fields(temp_pdf, signature_field_names)

in_pdf = copy_pdf_fields(source_pdf=temp_pdf, destination_pdf=in_pdf)
in_pdf.save(out_file)
Expand Down
51 changes: 51 additions & 0 deletions formfyxer/tests/test_pdf_labeling_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from reportlab.pdfgen import canvas

from formfyxer.pdf_wrangling import (
FieldType,
FormField,
_clamp_rect_to_page,
_estimate_page_anchor_transform,
Expand Down Expand Up @@ -134,6 +135,56 @@ def test_copy_pdf_fields_anchor_adjusts_rectangles(self, mock_get_transforms):
source_path.unlink(missing_ok=True)
destination_path.unlink(missing_ok=True)

def test_set_fields_writes_real_signature_fields(self):
with NamedTemporaryFile(suffix=".pdf", delete=False) as base_tmp:
base_path = Path(base_tmp.name)
with NamedTemporaryFile(suffix=".pdf", delete=False) as labeled_tmp:
labeled_path = Path(labeled_tmp.name)

try:
c = canvas.Canvas(str(base_path))
c.drawString(72, 720, "Signature")
c.save()

set_fields(
str(base_path),
str(labeled_path),
[
[
FormField(
"users1_signature",
FieldType.SIGNATURE,
72,
650,
font_size=12,
configs={"width": 140, "height": 24},
),
FormField.make_textbox(
"users1_name", (72, 610, 140, 20), 12
),
]
],
overwrite=True,
)

with pikepdf.Pdf.open(str(labeled_path)) as pdf:
fields = {
str(field.get("/T")): field for field in pdf.Root.AcroForm.Fields
}
self.assertEqual(str(fields["users1_signature"].get("/FT")), "/Sig")
self.assertNotIn("/V", fields["users1_signature"])
self.assertNotIn("/DV", fields["users1_signature"])
self.assertEqual(str(fields["users1_name"].get("/FT")), "/Tx")
self.assertEqual(int(pdf.Root.AcroForm.get("/SigFlags", 0)), 3)

loaded_fields = get_existing_pdf_fields(str(labeled_path))
self.assertEqual(loaded_fields[0][0].type, FieldType.SIGNATURE)
self.assertEqual(loaded_fields[0][0].configs["width"], 140.0)
self.assertEqual(loaded_fields[0][0].configs["height"], 24.0)
finally:
base_path.unlink(missing_ok=True)
labeled_path.unlink(missing_ok=True)

def test_improve_names_with_preferred_names(self):
fields = [[FormField.make_textbox("page_0_field_0", (100, 100, 120, 20), 12)]]
textboxes = [
Expand Down
27 changes: 27 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@
requires = ["setuptools>=40.0.0", "pip"]
build-backend = "setuptools.build_meta"

[dependency-groups]
dev = [
"boxdetect",
"eyecite",
"mypy",
"numpy",
"ocrmypdf",
"openai",
"opencv-python-headless",
"pandas",
"pandas-stubs",
"pdf2image",
"pdfminer.six",
"pikepdf",
"pytest",
"python-docx",
"python-dotenv",
"reportlab",
"requests",
"sigfig",
"textstat",
"tiktoken",
"transformers",
"types-PyYAML",
"types-requests",
]

[tool.black]
extend-exclude = '(__init__.py|setup.py)'

Expand Down