Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 70 additions & 7 deletions formfyxer/pdf_wrangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,9 @@ def __init__(
# If we aren't given options, make our own depending on self.type
if self.type == FieldType.CHECK_BOX:
self.configs = {
"buttonStyle": "check",
"borderColor": magenta,
"fillColor": pink,
"textColor": blue,
"forceBorder": True,
"buttonStyle": "cross",
"borderWidth": 0,
"forceBorder": False,
}
elif self.type == FieldType.TEXT:
self.configs = {"fieldFlags": "doNotScroll"}
Expand Down Expand Up @@ -377,7 +375,13 @@ def _normalize_object(obj: Any) -> bool:
del obj[key]
return True

for field in pdf.Root.AcroForm.get("/Fields", []):
acroform_fields: Iterable[Any] = (
cast(Iterable[Any], pdf.Root.AcroForm["/Fields"])
if "/Fields" in pdf.Root.AcroForm
else ()
)

for field in acroform_fields:
if _normalize_object(field):
continue
if hasattr(field, "Kids"):
Expand All @@ -387,12 +391,70 @@ def _normalize_object(obj: Any) -> bool:

if converted:
try:
existing_flags = int(pdf.Root.AcroForm.get("/SigFlags", 0) or 0)
existing_flags_obj = (
pdf.Root.AcroForm["/SigFlags"]
if "/SigFlags" in pdf.Root.AcroForm
else None
)
existing_flags = int(existing_flags_obj or 0)
except (TypeError, ValueError):
existing_flags = 0
pdf.Root.AcroForm["/SigFlags"] = existing_flags | 3


_REPORTLAB_TO_ACROBAT_CHECKBOX_CAPTIONS = {
"5": "8",
"N": "H",
}


def _normalize_checkbox_fields_for_acrobat(pdf: Pdf) -> None:
"""Rewrite ReportLab checkbox captions to Acrobat's native style values.

ReportLab writes legacy ZapfDingbats caption codes into ``/MK /CA`` for some
checkbox styles, most notably cross (``5``) and star (``N``). Acrobat
renders the saved appearance streams correctly, but its own field editor
reads ``/MK /CA`` to decide which checkbox style is selected and falls back
to the wrong option when those legacy codes are present.
"""
if not hasattr(pdf.Root, "AcroForm"):
return

checkbox_flag_radio = 1 << 15
checkbox_flag_pushbutton = 1 << 16

def _normalize_object(obj: Any) -> None:
if obj is None or not hasattr(obj, "get"):
return
if str(obj.get("/FT", "")) != "/Btn":
return
try:
flags = int(obj.get("/Ff", 0) or 0)
except (TypeError, ValueError):
flags = 0
if flags & (checkbox_flag_radio | checkbox_flag_pushbutton):
return
mk = obj.get("/MK")
if not isinstance(mk, pikepdf.Dictionary) or "/CA" not in mk:
return
caption = str(mk.get("/CA") or "")
updated = _REPORTLAB_TO_ACROBAT_CHECKBOX_CAPTIONS.get(caption)
if updated:
mk["/CA"] = pikepdf.String(updated)

acroform_fields: Iterable[Any] = (
cast(Iterable[Any], pdf.Root.AcroForm["/Fields"])
if "/Fields" in pdf.Root.AcroForm
else ()
)

for field in acroform_fields:
_normalize_object(field)
if hasattr(field, "Kids"):
for kid in field.Kids:
_normalize_object(kid)


def set_fields(
in_file: Union[str, Path, BinaryIO],
out_file: Union[str, Path, BinaryIO],
Expand Down Expand Up @@ -453,6 +515,7 @@ def set_fields(
if field.type == FieldType.SIGNATURE
]
_normalize_signature_fields(temp_pdf, signature_field_names)
_normalize_checkbox_fields_for_acrobat(temp_pdf)

in_pdf = copy_pdf_fields(source_pdf=temp_pdf, destination_pdf=in_pdf)
in_pdf.save(out_file)
Expand Down
45 changes: 45 additions & 0 deletions formfyxer/tests/test_pdf_labeling_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,51 @@ def test_set_fields_writes_real_signature_fields(self):
base_path.unlink(missing_ok=True)
labeled_path.unlink(missing_ok=True)

def test_set_fields_normalizes_checkbox_style_metadata_for_acrobat(self):
with NamedTemporaryFile(suffix=".pdf", delete=False) as base_tmp:
base_path = Path(base_tmp.name)
with NamedTemporaryFile(suffix=".pdf", delete=False) as labeled_tmp:
labeled_path = Path(labeled_tmp.name)

try:
c = canvas.Canvas(str(base_path))
c.drawString(72, 720, "Checkboxes")
c.save()

set_fields(
str(base_path),
str(labeled_path),
[
[
FormField(
"cross_box",
FieldType.CHECK_BOX,
72,
650,
configs={"size": 18, "buttonStyle": "cross"},
),
FormField(
"star_box",
FieldType.CHECK_BOX,
72,
620,
configs={"size": 18, "buttonStyle": "star"},
),
]
],
overwrite=True,
)

with pikepdf.Pdf.open(str(labeled_path)) as pdf:
fields = {str(field.get("/T")): field for field in pdf.Root.AcroForm.Fields}
self.assertEqual(str(fields["cross_box"]["/MK"]["/CA"]), "8")
self.assertEqual(str(fields["star_box"]["/MK"]["/CA"]), "H")
self.assertIn("/Yes", fields["cross_box"]["/AP"]["/N"])
self.assertIn("/Yes", fields["star_box"]["/AP"]["/N"])
finally:
base_path.unlink(missing_ok=True)
labeled_path.unlink(missing_ok=True)

def test_improve_names_with_preferred_names(self):
fields = [[FormField.make_textbox("page_0_field_0", (100, 100, 120, 20), 12)]]
textboxes = [
Expand Down
Loading