diff --git a/formfyxer/pdf_wrangling.py b/formfyxer/pdf_wrangling.py index 12ec6d6..0592e0e 100644 --- a/formfyxer/pdf_wrangling.py +++ b/formfyxer/pdf_wrangling.py @@ -238,7 +238,11 @@ def from_pikefield(cls, pike_field: PikeField) -> "FormField": ) def get_bbox(self) -> BoundingBoxF: - if self.type == FieldType.TEXT or self.type == FieldType.AREA: + if ( + self.type == FieldType.TEXT + or self.type == FieldType.AREA + or self.type == FieldType.SIGNATURE + ): return ( self.x, self.y, @@ -340,6 +344,55 @@ def _create_only_fields( c.save() +def _normalize_signature_fields(pdf: Pdf, signature_field_names: Iterable[str]) -> None: + """Convert ReportLab text fields into PDF signature fields. + + ReportLab does not expose an AcroForm signature widget API, so + ``_create_only_fields`` creates signature placeholders as text fields. This + function changes those generated widgets to ``/FT /Sig`` before they are + copied into the destination PDF. + """ + signature_names = { + str(field_name).strip() + for field_name in signature_field_names + if str(field_name).strip() + } + if not signature_names or not hasattr(pdf.Root, "AcroForm"): + return + + converted = 0 + + def _normalize_object(obj: Any) -> bool: + nonlocal converted + if obj is None or not hasattr(obj, "get"): + return False + field_name = str(obj.get("/T", "") or "").strip() + if field_name not in signature_names: + return False + if str(obj.get("/FT", "")) != "/Sig": + converted += 1 + obj["/FT"] = pikepdf.Name("/Sig") + for key in ("/V", "/DV", "/MaxLen", "/Q", "/DS", "/RV"): + if key in obj: + del obj[key] + return True + + for field in pdf.Root.AcroForm.get("/Fields", []): + if _normalize_object(field): + continue + if hasattr(field, "Kids"): + for kid in field.Kids: + if _normalize_object(kid): + kid["/FT"] = pikepdf.Name("/Sig") + + if converted: + try: + existing_flags = int(pdf.Root.AcroForm.get("/SigFlags", 0) or 0) + except (TypeError, ValueError): + existing_flags = 0 + pdf.Root.AcroForm["/SigFlags"] = existing_flags | 3 + + def set_fields( in_file: Union[str, Path, BinaryIO], out_file: Union[str, Path, BinaryIO], @@ -384,6 +437,7 @@ def set_fields( if not fields_per_page: # Nothing to do, lol return + fields_per_page = [list(page_fields) for page_fields in fields_per_page] in_pdf = Pdf.open(in_file, allow_overwriting_input=overwrite) if hasattr(in_pdf.Root, "AcroForm") and not overwrite: print("Not going to overwrite the existing AcroForm!") @@ -392,6 +446,13 @@ def set_fields( io_obj = io.BytesIO() _create_only_fields(io_obj, fields_per_page) temp_pdf = Pdf.open(io_obj) + signature_field_names = [ + field.name + for page_fields in fields_per_page + for field in page_fields + if field.type == FieldType.SIGNATURE + ] + _normalize_signature_fields(temp_pdf, signature_field_names) in_pdf = copy_pdf_fields(source_pdf=temp_pdf, destination_pdf=in_pdf) in_pdf.save(out_file) diff --git a/formfyxer/tests/test_pdf_labeling_rules.py b/formfyxer/tests/test_pdf_labeling_rules.py index 7e27f89..ce865ec 100644 --- a/formfyxer/tests/test_pdf_labeling_rules.py +++ b/formfyxer/tests/test_pdf_labeling_rules.py @@ -8,6 +8,7 @@ from reportlab.pdfgen import canvas from formfyxer.pdf_wrangling import ( + FieldType, FormField, _clamp_rect_to_page, _estimate_page_anchor_transform, @@ -134,6 +135,56 @@ def test_copy_pdf_fields_anchor_adjusts_rectangles(self, mock_get_transforms): source_path.unlink(missing_ok=True) destination_path.unlink(missing_ok=True) + def test_set_fields_writes_real_signature_fields(self): + with NamedTemporaryFile(suffix=".pdf", delete=False) as base_tmp: + base_path = Path(base_tmp.name) + with NamedTemporaryFile(suffix=".pdf", delete=False) as labeled_tmp: + labeled_path = Path(labeled_tmp.name) + + try: + c = canvas.Canvas(str(base_path)) + c.drawString(72, 720, "Signature") + c.save() + + set_fields( + str(base_path), + str(labeled_path), + [ + [ + FormField( + "users1_signature", + FieldType.SIGNATURE, + 72, + 650, + font_size=12, + configs={"width": 140, "height": 24}, + ), + FormField.make_textbox( + "users1_name", (72, 610, 140, 20), 12 + ), + ] + ], + overwrite=True, + ) + + with pikepdf.Pdf.open(str(labeled_path)) as pdf: + fields = { + str(field.get("/T")): field for field in pdf.Root.AcroForm.Fields + } + self.assertEqual(str(fields["users1_signature"].get("/FT")), "/Sig") + self.assertNotIn("/V", fields["users1_signature"]) + self.assertNotIn("/DV", fields["users1_signature"]) + self.assertEqual(str(fields["users1_name"].get("/FT")), "/Tx") + self.assertEqual(int(pdf.Root.AcroForm.get("/SigFlags", 0)), 3) + + loaded_fields = get_existing_pdf_fields(str(labeled_path)) + self.assertEqual(loaded_fields[0][0].type, FieldType.SIGNATURE) + self.assertEqual(loaded_fields[0][0].configs["width"], 140.0) + self.assertEqual(loaded_fields[0][0].configs["height"], 24.0) + finally: + base_path.unlink(missing_ok=True) + labeled_path.unlink(missing_ok=True) + def test_improve_names_with_preferred_names(self): fields = [[FormField.make_textbox("page_0_field_0", (100, 100, 120, 20), 12)]] textboxes = [ diff --git a/pyproject.toml b/pyproject.toml index b3f116d..be8b6da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,33 @@ requires = ["setuptools>=40.0.0", "pip"] build-backend = "setuptools.build_meta" +[dependency-groups] +dev = [ + "boxdetect", + "eyecite", + "mypy", + "numpy", + "ocrmypdf", + "openai", + "opencv-python-headless", + "pandas", + "pandas-stubs", + "pdf2image", + "pdfminer.six", + "pikepdf", + "pytest", + "python-docx", + "python-dotenv", + "reportlab", + "requests", + "sigfig", + "textstat", + "tiktoken", + "transformers", + "types-PyYAML", + "types-requests", +] + [tool.black] extend-exclude = '(__init__.py|setup.py)'