-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
716 lines (665 loc) · 42.2 KB
/
index.html
File metadata and controls
716 lines (665 loc) · 42.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
<!DOCTYPE html>
<html lang="en">
<head>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-V7B58PGZZY"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-V7B58PGZZY');
</script>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="style.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;500;700&display=swap" rel="stylesheet">
<title>Python PDF Automation | ReportLab & PyMuPDF Courses by Hugo Ferro</title>
<meta name="author" content="Hugo Ferro">
<meta name="publisher" content="Hugo Ferro">
<meta name="robots" content="index, follow">
<meta property="og:type" content="website">
<meta name="keywords" content="Python PDF generation, ReportLab course, PyMuPDF tutorial, automated PDF documents, Python document automation, Gumroad PDF templates, Udemy Python courses">
<meta name="description" content="Courses and professional tools for PDF automation in Python. ReportLab, PyMuPDF, invoice generation, PDF manipulation, and much more.">
<meta property="og:title" content="Python PDF Automation — Courses, Tools & Freelance Services | Hugo Ferro">
<meta property="og:description" content="Python PDF automation specialist with 900+ students on Udemy. ReportLab, PyMuPDF, data extraction. Courses, tools and freelance services.">
<meta property="og:image" content="https://hasff.github.io/site/assets/imgs/og-image.png">
<meta property="og:url" content="https://hasff.github.io/site/">
<meta property="og:type" content="website">
<meta name="twitter:card" content="summary_large_image">
<link rel="icon" type="image/x-icon" href="assets/imgs/favicon.ico">
<!-- Site Meta -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Person",
"name": "Hugo Ferro",
"url": "https://hasff.github.io/site/",
"jobTitle": "Python Developer & Instructor",
"knowsAbout": ["Python PDF Automation", "ReportLab", "PyMuPDF", "FastAPI", "Django"],
"description": "Specialist in Python PDF document automation, teaching ReportLab and PyMuPDF professional workflows.",
"sameAs": [
"https://github.com/hasff",
"https://www.linkedin.com/in/hugo-ferro-1434b414/"
]
}
</script>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "WebSite",
"name": "Python PDF Automation by Hugo Ferro",
"url": "https://hasff.github.io/site/",
"publisher": {
"@type": "Person",
"name": "Hugo Ferro"
}
}
</script>
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "WebPage",
"name": "Python PDF Automation Courses & Tools",
"url": "https://hasff.github.io/site/",
"description": "Courses, examples, and tools for Python PDF automation using ReportLab and PyMuPDF."
}
</script>
<!-- Courses -->
<!-- Python PDF Generation: From Beginner to Winner -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Course",
"name": "Python PDF Generation: From Beginner to Winner",
"description": "A complete hands-on course teaching Python PDF automation using ReportLab. Learn how to generate invoices, reports, certificates, and complex PDF layouts programmatically.",
"provider": {
"@type": "Person",
"name": "Hugo Ferro",
"url": "https://hasff.github.io/site/"
},
"hasCourseInstance": {
"@type": "CourseInstance",
"courseMode": "online",
"url": "https://hasff.github.io/site/#courses",
"location": {
"@type": "VirtualLocation",
"url": "https://www.udemy.com/course/python-reportlab-from-beginner-to-winner/3B927E883D2E868CF221"
}
}
}
</script>
<!-- Python PDF Handling: From Beginner to Winner -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Course",
"name": "Python PDF Handling: From Beginner to Winner",
"description": "Learn advanced PDF extraction, merging, annotation, and manipulation using PyMuPDF in Python.",
"provider": {
"@type": "Person",
"name": "Hugo Ferro",
"url": "https://hasff.github.io/site/"
},
"hasCourseInstance": {
"@type": "CourseInstance",
"courseMode": "online",
"url": "https://hasff.github.io/site/#courses",
"location": {
"@type": "VirtualLocation",
"url": "https://www.udemy.com/course/python-pdf-handling-from-beginner-to-winner/E7B71DCA8314B0BAC4BD"
}
}
}
</script>
<!-- Tools -->
<!-- Multi-Page Invoice Generator (Pro) -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "Product",
"name": "Multi-Page Invoice Generator (Pro)",
"description": "A ready-to-use Python template for generating professional invoices in ReportLab. Handles multi-page invoices, repeating headers, international fonts, QR codes, and predictable pagination using ReportLab.",
"image": "https://hasff.github.io/site/assets/imgs/Multi-Page Invoice Generator (Pro).png",
"brand": {
"@type": "Person",
"name": "Hugo Ferro",
"url": "https://hasff.github.io/site/"
},
"url": "https://hasff.github.io/site/#tools",
"offers": {
"@type": "Offer",
"priceCurrency": "USD",
"price": "19.00",
"availability": "https://schema.org/InStock",
"url": "https://alltech.gumroad.com/l/agvoio"
}
}
</script>
</head>
<body>
<header>
<nav class="navbar">
<div class="logo">
<a style="text-decoration: none;" href="https://www.linkedin.com/in/hugo-ferro-1434b414/" target="_blank"><span>Hugo Ferro</span></a>
</div>
<ul class="nav-links">
<li><a href="#examples" class="nav-link" aria-label="Menu: Examples">Examples</a></li>
<li><a href="#courses" class="nav-link" aria-label="Menu: Courses">Courses</a></li>
<li><a href="#tools" class="nav-link" aria-label="Menu: Tools">Tools</a></li>
<li><a href="#contact" class="nav-link" aria-label="Menu: Contact">Contact</a></li>
</ul>
</nav>
</header>
<main>
<section id="examples" class="section">
<div class="hero">
<h1>Python PDF Automation – Examples and Real Projects</h1>
<p>I specialize in PDF automation with Python, using ReportLab and PyMuPDF. Here you'll find real-world examples, complete courses, and production tools for generating and manipulating PDFs.</p>
<p> Explore my latest projects and technical demonstrations.</p>
<div style="display:flex; justify-content:center; gap:2rem; margin-top:1.5rem; flex-wrap:wrap;">
<span style="font-size:0.95rem; font-weight:600; color:#2563eb;">✦ 900+ students trained on Udemy</span>
<span style="font-size:0.95rem; font-weight:600; color:#4b5563;">✦ ReportLab & PyMuPDF</span>
<span style="font-size:0.95rem; font-weight:600; color:#4b5563;">✦ Available for freelance</span>
</div>
</div>
<div class="grid">
<!-- Automated Invoice Generation -->
<div class="card">
<div class="card-example-container">
<div class="card-title-description">
<h3>Automated Invoice Generation</h3>
<p>
Example of a production-style invoice generator built with Python.
This type of system can generate thousands of invoices automatically
from databases or ERP systems.
</p>
</div>
<div class="card-image">
<a href="assets/pdfs/invoice_sample1.pdf" target="_blank" class="view-link-img" aria-label="Python Invoice PDF Generator (Example 1): View PDF from Image" >
<img src="assets/imgs/python-reportlab-invoice-example1.jpg" alt="Professional Invoice PDF generated with Python ReportLab">
</a>
</div>
</div>
<div class="card-links">
<a href="assets/pdfs/invoice_sample1.pdf" target="_blank" class="view-link" aria-label="Python Invoice PDF Generator (Example 1): View PDF from Button">View PDF</a>
<span class="separator">|</span>
<a href="https://github.com/hasff/python-invoice-pdf-generator" target="_blank" class="view-link" aria-label="Python Invoice PDF Generator (Example 1): View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
This example demonstrates automated PDF invoice generation using ReportLab in Python.
Features include multi-page pagination, repeating table headers, VAT calculations,
branded layouts, and BytesIO streaming for API integration. No HTML, no headless browser —
pure Python, deterministic output every time.
</p>
</div>
<!-- Fillable PDF Form Generator -->
<div class="card">
<div class="card-example-container">
<div class="card-title-description">
<h3>Fillable PDF Form Generator</h3>
<p>
Example of a production-style fillable PDF form built entirely in Python —
no Acrobat, no Word export. Interactive fields, checkboxes, and a signature
area, generated programmatically with pixel-perfect layout control.
</p>
</div>
<div class="card-image">
<a href="assets/pdfs/patient_admission_form.pdf" target="_blank" class="view-link-img" aria-label="Python Fillable PDF Form Generator: View Project from Image">
<img src="assets/imgs/fillable/preview1.jpg" alt="Fillable PDF patient admission form generated with Python ReportLab">
</a>
</div>
</div>
<div class="card-links">
<a href="assets/pdfs/patient_admission_form.pdf" target="_blank" class="view-link" aria-label="Python Fillable PDF Form Generator: View PDF from Button">View PDF</a>
<span class="separator">|</span>
<a href="https://github.com/hasff/python-fillable-pdf-form-generator" target="_blank" class="view-link" aria-label="Python Fillable PDF Form Generator: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
This example demonstrates programmatic AcroForm generation using ReportLab in Python.
Features include coordinate-based layout, named text fields, toggle checkboxes, a signature
area, and branded section headers — all produced from a single script with no external tools
or manual steps.
</p>
</div>
<!-- Fillable PDF Form Extractor -->
<div class="card">
<div class="card-example-container">
<div class="card-title-description">
<h3>Fillable PDF Form Extractor</h3>
<p>
The other half of the cycle. Reads a filled AcroForm PDF and extracts
all field data programmatically — grouped by section, exported to Excel.
No field name knowledge required.
</p>
</div>
<div class="card-image">
<a href="assets/imgs/fillable/extractor/patient_admission_form_john_doe.png" target="_blank" class="view-link-img" aria-label="Python Fillable PDF Form Extractor: View filled form from Image">
<img src="assets/imgs/fillable/extractor/patient_admission_form_john_doe.png" alt="Filled AcroForm PDF patient admission form ready for extraction">
</a>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-fillable-pdf-form-extractor" target="_blank" class="view-link" aria-label="Python Fillable PDF Form Extractor: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
Extracts AcroForm field data by spatial region — not by field name. Works on
third-party forms where the internal structure is unknown. Includes an inspection
mode to calibrate section boundaries visually before running extraction.
Output: a formatted Excel file with all fields grouped by section.
</p>
</div>
</div>
<!-- Case Study: PDF to Excel -->
<div class="case-study-card">
<div class="case-study-header">
<h3>PDF Invoice Data Extraction — PDF to Excel</h3>
<p>A real-world invoice from an external source, processed entirely with Python. The script locates each data region dynamically — no hardcoded coordinates.</p>
</div>
<div class="case-study-steps">
<div class="case-study-step">
<div class="step-badge">01</div>
<a href="assets/imgs/sample_preview.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H400">
<img src="assets/imgs/sample_preview.jpg" alt="Source PDF Invoice">
</div>
</a>
<p class="step-label">Source Document</p>
<p class="step-desc">A real PDF invoice from an external source — not generated by me.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">02</div>
<a href="assets/imgs/debug_preview.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H400">
<img src="assets/imgs/debug_preview.jpg" alt="Dynamic Bounding Box Detection">
</div>
</a>
<p class="step-label">Dynamic Detection</p>
<p class="step-desc">Bounding boxes computed at runtime by locating anchor words.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">03</div>
<a href="assets/imgs/generated_excel.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H400">
<img src="assets/imgs/generated_excel.jpg" alt="Generated Excel Report">
</div>
</a>
<p class="step-label">Excel Output</p>
<p class="step-desc">Structured, formatted Excel report ready for accounting or ERP.</p>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-invoice-pdf-data-extractor" target="_blank" class="view-link" aria-label="PDF Data Extractor: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
This example demonstrates a complete Python PDF automation pipeline:
extracting structured data from a real-world invoice using pdfplumber,
dynamic bounding box detection, and exporting to a formatted Excel report
using openpyxl. No hardcoded coordinates — the script adapts to the
document layout automatically.
</p>
</div>
<!-- Case Study: Handwritten OCR -->
<div class="case-study-card">
<div class="case-study-header">
<h3>Handwritten OCR — Export to PDF, Excel and Word</h3>
<p>A real photo taken with a mobile phone, processed entirely with Python. Image orientation is corrected automatically before sending to Google Gemini.</p>
</div>
<div class="case-study-steps">
<div class="case-study-step">
<div class="step-badge">01</div>
<a href="assets/imgs/ocr/handwrite.jpeg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/ocr/handwrite.jpeg" alt="Handwritten checklist photo">
</div>
</a>
<p class="step-label">Handwritten Photo</p>
<p class="step-desc">A real photo taken with a mobile phone — imperfect, rotated, cursive handwriting.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">02</div>
<a href="assets/imgs/ocr/preview_pdf.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/ocr/preview_pdf.jpg" alt="Generated PDF report">
</div>
</a>
<p class="step-label">PDF Output</p>
<p class="step-desc">Printable report with title, styled table and footnote, generated with ReportLab.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">03</div>
<a href="assets/imgs/ocr/preview_excel.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/ocr/preview_excel.jpg" alt="Generated Excel report">
</div>
</a>
<p class="step-label">Excel Output</p>
<p class="step-desc">Formatted workbook with frozen header, alternating rows and auto-sized columns.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">04</div>
<a href="assets/imgs/ocr/preview_word.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/ocr/preview_word.jpg" alt="Generated Word report">
</div>
</a>
<p class="step-label">Word Output</p>
<p class="step-desc">Editable .docx file with matching styles, ready to share or modify.</p>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-handwritten-ocr-document-generator" target="_blank" class="view-link" aria-label="Handwritten OCR: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
This pipeline detects and corrects EXIF image orientation automatically, sends the image to Google Gemini, and parses the structured response into PDF, Excel and Word — no hardcoded templates, no manual data entry. Tested against EasyOCR, which failed on cursive handwriting with confidence scores below 10%.
</p>
</div>
<!-- Case Study: PDF Sensitive Data Redactor -->
<div class="case-study-card">
<div class="case-study-header">
<h3>PDF Sensitive Data Redaction — Text, Images & Metadata</h3>
<p>A complete case study on what actually happens when you ask AI chatbots to redact a PDF — and why a specialised pipeline is needed to do it correctly.</p>
<p style="margin-top: 0.5rem; font-style: italic; color: #6b7280;">"Can't I just ask ChatGPT to redact my PDF?" — Tested. Here's what happened.</p>
</div>
<div class="case-study-steps">
<div class="case-study-step">
<div class="step-badge">01</div>
<a href="assets/imgs/redactor/false_redact_table.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/redactor/false_redact_table.jpg" alt="False redact — text still selectable and copyable">
</div>
</a>
<p class="step-label">The False Redact Trap</p>
<p class="step-desc">Black boxes drawn over text — but the data is still there. Copy/paste extracts everything.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">02</div>
<a href="assets/imgs/redactor/true_redact.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/redactor/true_redact.jpg" alt="True redact — sensitive text permanently removed from PDF structure">
</div>
</a>
<p class="step-label">True Text Redaction</p>
<p class="step-desc">Sensitive text permanently removed from the PDF structure. Only non-redacted content can be copied.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">03</div>
<a href="assets/imgs/redactor/redact_images.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/redactor/redact_images.jpg" alt="Embedded images redacted using OCR — including handwritten signature">
</div>
</a>
<p class="step-label">Image Redaction (OCR)</p>
<p class="step-desc">Sensitive text inside embedded images located via OCR and permanently overwritten. Handwritten content redacted as a safe fallback.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">04</div>
<a href="assets/imgs/redactor/metadata_wiped.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H250">
<img src="assets/imgs/redactor/metadata_wiped.jpg" alt="Document metadata wiped — author, title, keywords, creation date removed">
</div>
</a>
<p class="step-label">Metadata Wiped</p>
<p class="step-desc">Author, title, keywords, creation date and software fingerprint removed from the final file.</p>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-pdf-sensitive-data-redactor" target="_blank" class="view-link" aria-label="PDF Sensitive Data Redactor: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
Four AI chatbots were tested with the same document and the same request. Gemini and Mistral refused to produce a PDF. Copilot returned the original file unchanged and claimed it had been fully redacted. ChatGPT made three attempts — the best result still left names visible, dropped words mid-sentence, and ignored images. This pipeline uses AI only for classification (Google Gemini) and specialised libraries for precision editing (PyMuPDF, EasyOCR) — each tool doing only what it is good at.
</p>
</div>
<!-- Case Study: Fillable PDF Form Extractor -->
<div class="case-study-card">
<div class="case-study-header">
<h3>AcroForm Data Extraction — Filled PDF to Excel</h3>
<p>A patient admission form filled in a standard PDF reader, processed entirely with Python. Fields are located by position — not by name — so the approach works on any AcroForm, including forms from third parties.</p>
</div>
<div class="case-study-steps">
<div class="case-study-step">
<div class="step-badge">01</div>
<a href="assets/imgs/fillable/extractor/patient_admission_form_john_doe.png" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/fillable/extractor/patient_admission_form_john_doe.png" alt="Filled AcroForm PDF patient admission form">
</div>
</a>
<p class="step-label">Filled Form</p>
<p class="step-desc">A patient admission AcroForm filled in any standard PDF reader — no special software.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">02</div>
<a href="assets/imgs/fillable/extractor/patient_admission_form_john_doe_debug.png" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/fillable/extractor/patient_admission_form_john_doe_debug.png" alt="Section areas overlaid on the form for calibration">
</div>
</a>
<p class="step-label">Spatial Calibration</p>
<p class="step-desc">Section boundaries defined as percentages of page height and visualised before extraction runs.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">03</div>
<a href="assets/imgs/fillable/extractor/patient_admission_form_john_doe_excel.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/fillable/extractor/patient_admission_form_john_doe_excel.jpg" alt="Extracted form data in a formatted Excel file">
</div>
</a>
<p class="step-label">Extracted to Excel</p>
<p class="step-desc">All 35 fields written to a formatted workbook — grouped by section, no manual copying.</p>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-fillable-pdf-form-extractor" target="_blank" class="view-link" aria-label="Fillable PDF Form Extractor: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
Fields are matched to sections by spatial intersection — if a widget's bounding rectangle falls inside a defined region, it belongs to that section. This makes the pipeline robust to third-party forms where field names are unknown. An inspection mode renders the section areas onto a debug PDF for visual confirmation before any data is extracted.
</p>
</div>
<!-- Case Study: AT&T Mobility PDF Extractor -->
<div class="case-study-card">
<div class="case-study-header">
<h3>Python AT&T Mobility PDF Extractor</h3>
<p>This project extracts and processes AT&T Mobility usage data from PDF statements,
allowing for automated analysis and reporting. The script parses detailed call records,
including timestamps, cell locations, and call types, and exporting to CSV.
</p>
</div>
<div class="case-study-steps">
<div class="case-study-step">
<div class="step-badge">01</div>
<a href="assets/imgs/att/att_original.png" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/att/att_original.png" alt="Original AT&T Mobility PDF statement with call records">
</div>
</a>
<p class="step-label">Source PDF Statement</p>
<p class="step-desc">A sample page from an AT&T Mobility usage PDF statement, showing detailed call records.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">02</div>
<a href="assets/imgs/att/sections_inspection.png" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/att/sections_inspection.png" alt="Section areas overlaid on the PDF for calibration">
</div>
</a>
<p class="step-label">Spatial Calibration</p>
<p class="step-desc">Section boundaries defined as percentages of page height and visualised before extraction runs.</p>
</div>
<div class="case-study-arrow">→</div>
<div class="case-study-step">
<div class="step-badge">03</div>
<a href="assets/imgs/att/output_csv_example.jpg" target="_blank" class="view-link-img">
<div class="case-study-step-image fixed-H420">
<img src="assets/imgs/att/output_csv_example.jpg" alt="Extracted data in a CSV file with call records structured in rows and columns">
</div>
</a>
<p class="step-label">Extracted Data</p>
<p class="step-desc">A preview of the extracted and structured data, ready for analysis or integration with other systems.</p>
</div>
</div>
<div class="card-links">
<a href="https://github.com/hasff/python-att-mobility-pdf-extractor" target="_blank" class="view-link" aria-label="Python AT&T Mobility PDF Extractor: View Code">View Code</a>
</div>
<p style="font-size: 0.82rem; color: #9ca3af; margin-top: 1rem; line-height: 1.6;">
The script uses libraries like PyMuPDF for PDF parsing and pdfplumber for data manipulation,
ensuring high performance and reliability. It is particularly useful for telecom audits,
cost analysis, or integration with internal reporting tools.
</p>
</div>
</section>
<section id="courses" class="section gray-bg">
<h1>Learn from my Mistakes</h1>
<p>Skip the months of headache. I've distilled my entire PDF automation workflow into these step-by-step courses.</p>
<div class="grid">
<div class="card">
<h3>Python PDF Generation: From Beginner to Winner</h3>
<br>
<p><strong>A hands-on journey based on real client projects.</strong> I’ve spent months distilling my professional workflow into this course, so you can skip the frustration of broken layouts.</p>
<ul class="card-features">
<li>✔ Tables, Paragraphs & Flowables</li>
<li>✔ Security (Passwords & Permissions)</li>
<li>✔ Watermarking</li>
<li>✔ Forms (Checkboxes, Textfields, etc)</li>
<li>✔ Add JavaScript to PDFs</li>
<li>✔ Django Integration</li>
</ul>
<p class="personal-note"><em>"This is the result of years of trial and error, structured to make you a winner."</em></p>
<div id="btn-reportlab"></div>
</div>
<div class="card">
<h3>Python PDF Handling: From Beginner to Winner</h3>
<p><strong>Master the art of PDF manipulation and reverse-engineering.</strong> This course goes beyond simple scripts—I’ll teach you how to explore documentation and debug source code to solve impossible tasks. From metadata extraction to advanced page surgery, you'll learn to control every byte of a PDF file.</p>
<ul class="card-features">
<li>✔ Text & Image Extraction (TXT/PNG)</li>
<li>✔ Page Surgery (Rotate, Crop, Resize)</li>
<li>✔ Split, Merge & Watermark</li>
<li>✔ AI Text Recognition (OCR) from Scans</li>
<li>✔ Snapshots & High-Speed Processing</li>
<li>✔ Deep-Dive Debugging Skills</li>
</ul>
<p class="personal-note"><em>"Don't just use a library—learn how to unravel complex problems when no one else has the answer."</em></p>
<div id="btn-pymupdf"></div>
</div>
</div>
</section>
<section id="tools" class="section">
<h1>Production-Ready Tools</h1>
<p>Don't waste days fighting PDF layout engines. Use the same billing systems I've built for professional SaaS projects.</p>
<div class="grid">
<div class="card">
<div class="card-content">
<h3>Multi-Page Invoice Generator (Pro)</h3>
<br>
<p><strong>A deterministic billing engine for Python APIs.</strong> Stop dealing with unpredictable HTML-to-PDF tools. This engine handles complex pagination, repeating headers, and international typography natively.</p>
<ul class="card-features">
<li>✔ Multi-page Flowables (Auto-headers)</li>
<li>✔ API-Friendly (BytesIO Streaming)</li>
<li>✔ Unicode & International Fonts</li>
<li>✔ QR Codes for Payments</li>
<li>✔ Page X of Y & Legal Layouts</li>
<li>✔ No Headless Browser Required</li>
</ul>
<p class="personal-note"><em>"Built for developers who need invoices that behave predictably in production."</em></p>
</div>
<a href="https://alltech.gumroad.com/l/agvoio" target="_blank" class="cta-gumroad" aria-label="Multi-Page Invoice Generator (Pro): Get the Source Code on Gumroad">
Get the Source Code on Gumroad
</a>
</div>
<div class="card">
<div class="card-content">
<h3>Need a specific solution?</h3>
<br>
<p>If you have a unique PDF challenge or need a specific template that isn't listed here, I'm happy to help.</p>
<p>Whether it's a quick question or a custom project, let's see if we can work together to solve it.</p>
</div>
<a href="#contact" class="cta-button" style="margin-top: 20px;" aria-label="Need a specific solution?: Send me a message">Send me a message</a>
</div>
</div>
</section>
<section id="contact" class="section gray-bg">
<h1>Get in Touch</h1>
<p>Have a question about a course or product? Let's talk.</p>
<div class="contact-box" style="display: flex; gap: 15px; justify-content: center;">
<a href="mailto:hugoferro.business@gmail.com" class="cta-button" aria-label="Get in Touch: Send me an Email">Send me an Email</a>
<a href="https://www.linkedin.com/in/hugo-ferro-1434b414/" target="_blank" class="cta-button" style="background-color: #0077b5; border-color: #0077b5;" aria-label="Get in Touch: LinkedIn">
Connect on LinkedIn
</a>
</div>
</section>
</main>
<br>
<br>
<br>
<footer style="text-align: center; padding: 40px 20px; border-top: 1px solid #eee;">
<div style="margin-bottom: 15px;">
<a href="https://www.linkedin.com/in/hugo-ferro-1434b414/" target="_blank" style="color: #555; text-decoration: none; margin: 0 15px; font-weight: 500;" class="nav-link">LinkedIn</a>
<a href="https://github.com/hasff" target="_blank" style="color: #555; text-decoration: none; margin: 0 15px; font-weight: 500;" class="nav-link">GitHub</a>
</div>
<p style="font-size: 0.9rem; color: #888;">© 2026 Hugo Ferro. All rights reserved.</p>
</footer>
<script>
document.addEventListener('DOMContentLoaded', function() {
// --- CONFIGURATION: Only update the 'code' variable ---
const COURSES_DATA = {
reportlab: {
code: "MAR_20_26", // Current coupon code
baseUrl: "https://www.udemy.com/course/python-reportlab-from-beginner-to-winner/",
referralCode: "3B927E883D2E868CF221"
},
pymupdf: {
code: "MAR_20_26", // Current coupon code
baseUrl: "https://www.udemy.com/course/python-pdf-handling-from-beginner-to-winner/",
referralCode: "E7B71DCA8314B0BAC4BD"
}
};
for (const [id, course] of Object.entries(COURSES_DATA)) {
const container = document.getElementById(`btn-${id}`);
if (!container) continue;
// Logic: If code exists, build coupon link. Else, build referral link.
const finalLink = course.code
? `${course.baseUrl}?couponCode=${course.code}`
: `${course.baseUrl}?referralCode=${course.referralCode}`;
if (course.code) {
container.innerHTML = `
<div class="coupon-area">
<span class="coupon-label">Special Offer Active!</span>
<a href="${finalLink}" target="_blank" class="cta-udemy promo" aria-label="Udemy Course ${id}: coupon ${course.code}">
Enroll with Discount (Code: ${course.code})
</a>
</div>`;
} else {
container.innerHTML = `
<a href="${finalLink}" target="_blank" class="cta-udemy" aria-label="Udemy Course ${id}: referral link">
Enroll on Udemy
</a>`;
}
}
document.querySelectorAll('.cta-udemy, .cta-button, .cta-gumroad, .view-link, .nav-link').forEach(button => {
button.addEventListener('click', function() {
const buttonText = this.getAttribute('aria-label') || this.innerText.trim() || 'No Text Link';
const targetUrl = this.href;
// Envia o evento para o Google Analytics
if (typeof gtag === 'function') {
gtag('event', 'purchase_intent', {
'event_category': 'conversion',
'event_label': buttonText,
'destination_url': targetUrl,
'platform': targetUrl.includes('udemy') ? 'Udemy' : (targetUrl.includes('gumroad') ? 'Gumroad' : 'Other')
});
}
});
});
});
</script>
</body>
</html>