-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathiTextSharp.5.5.9_Example.cs
More file actions
35 lines (31 loc) · 1.45 KB
/
iTextSharp.5.5.9_Example.cs
File metadata and controls
35 lines (31 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace PDF_Text_Extractor
{
class iTextSharp559Example
{
static void Main(string[] args)
{
string pdfPath = "C:\\mypdf.pdf";
PdfReader reader = new PdfReader(pdfPath);
String output = null;
for (int i = 1; i <= reader.NumberOfPages; i++)
output = output + PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy());
// NOTE: It seems that you will not be able to read it cell-by-cell because once the PDF is baked, it's no longer a table with cells, but vectors and text.
// Other way of obtaining the data: (It could be handy for tables)
// output = output + PdfTextExtractor.GetTextFromPage(reader, i, new LocationTextExtractionStrategy());
// Some links:
//http://stackoverflow.com/questions/6956078/read-from-a-pdf-file-using-c-sharp
//http://stackoverflow.com/questions/6956814/read-tables-from-a-pdf-file-using-c-sharp
//http://stackoverflow.com/questions/32014589/how-to-read-data-from-table-structured-pdf-using-itextsharp
Console.Write("PDF Content:" + Environment.NewLine);
Console.Write(output);
Console.Write(Environment.NewLine + "--EOF--");
}
}
}