Merge pull request #1904 from syncfusion-content/997098-dev

chinnumuniyappan · web-flow · commit 1ebaa747f9a1 · 2025-12-04T22:21:18.000+05:30
997098-dev: Added volume 4 UG changes in PDF library.
diff --git a/Document-Processing/PDF/PDF-Library/NET/Supported-and-Unsupported-Features.md b/Document-Processing/PDF/PDF-Library/NET/Supported-and-Unsupported-Features.md
@@ -1,11 +1,11 @@
 ---
-title: Supported and Unsupported Features | Syncfusion 
+title: Supported and Unsupported PDF Features | Syncfusion 
 description: This section explains about features available in Essential PDF and their availability in different platforms.
 platform: document-processing
 control: PDF
 documentation: UG
 ---
-# Supported and Unsupported Features 
+# Supported and Unsupported PDF Features 
 
 The following table shows the various features available in the Essential<sup>&reg;</sup> PDF and their availability in different platforms.
 
@@ -660,10 +660,10 @@ Yes<br/><br/></td>
 PDF/x1a: 2001 Compliance<br/><br/></td><td>
 Yes<br/><br/></td><td>
 Yes<br/><br/></td><td>
-No<br/><br/></td>
-<td>No<br/><br/></td>
-<td>No<br/><br/></td>
-<td>No<br/><br/></td></tr>
+Yes<br/><br/></td>
+<td>Yes<br/><br/></td>
+<td>Yes<br/><br/></td>
+<td>Yes<br/><br/></td></tr>
 <tr>
 <td>
 ZUGFeRD Invoice<br/><br/></td><td>
diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Features.md b/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Features.md
@@ -152,7 +152,7 @@ You can downloaded a complete working sample from [GitHub](https://github.com/Sy
 
 ## Performing OCR with tesseract version 3.05
 
-The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version between 3.02 and 3.05. By default, OCR works with tesseract version 3.02.
+The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version between 3.02 and 3.05. By default, OCR works with tesseract version 5.0.
 
 N> The starting supported version of tesseract in ASP.NET Core is 4.0. So the lower tesseract versions 3.02 and 3.05 are not supported and we don't have the property called ``TesseractVersion`` in ASP.NET Core platform.
 
@@ -216,9 +216,7 @@ End Using
 
 ## Performing OCR with Tesseract Version 4.0
 
-The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version to 4.0. By default, OCR will be performed with tesseract version 3.02.
-
-N> In ASP.NET Core platform, the default and starting supported version of tesseract is 4.0. So we did not have the property called ``TesseractVersion`` in ASP.NET Core platform. 
+The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version to 4.0. By default, OCR will be performed with tesseract version 5.0.
 
 The following code sample explains the OCR processor with Tesseract version 4.0 for PDF documents.
  
@@ -277,6 +275,67 @@ End Using
 
 {% endtabs %}  
 
+## Performing OCR with Tesseract Version 5.0
+
+The [TesseractVersion](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_TesseractVersion) property is used to switch the tesseract version to 5.0. By default, OCR will be performed with tesseract version 5.0.
+
+The following code sample explains the OCR processor with Tesseract version 5.0 for PDF documents.
+ 
+{% tabs %} 
+
+{% highlight c# tabtitle="C# [Cross-platform]" %}
+
+using Syncfusion.OCRProcessor;
+using Syncfusion.Pdf.Parsing;
+
+//Initialize the OCR processor.
+using (OCRProcessor processor = new OCRProcessor())
+{
+    //Load an existing PDF document.
+    PdfLoadedDocument document = new PdfLoadedDocument("Input.pdf");
+
+    //Set OCR language.
+    processor.Settings.Language = Languages.English;
+    //Set tesseract OCR Engine.
+    processor.Settings.TesseractVersion = TesseractVersion.Version5_0;
+    //Perform OCR with input document, tessdata (Language packs) and enabling isMemoryOptimized property.
+    processor.PerformOCR(document);
+
+    //Save the PDF document.
+    document.Save("Output.pdf);
+    //Close the document.
+    document.Close(true);
+}
+
+{% endhighlight %}
+
+{% highlight vb.net tabtitle="VB.NET [Windows-specific]" %}
+
+Imports Syncfusion.OCRProcessor
+Imports Syncfusion.Pdf.Parsing
+
+'Initialize the OCR processor with tesseract binaries folder path. 
+Using processor As OCRProcessor = New OCRProcessor("TesseractBinaries/5.0/")
+    'Load an existing PDF document. 
+    Dim document As PdfLoadedDocument = New PdfLoadedDocument("Input.pdf")
+
+    'Set OCR language. 
+    processor.Settings.Language = Languages.English
+    'Set tesseract OCR Engine. 
+    processor.Settings.TesseractVersion = TesseractVersion.Version5_0
+    'Perform OCR with input document, tessdata (Language packs) and enabling isMemoryOptimized property.
+    processor.PerformOCR(document)
+    
+    'Save the PDF document.
+    document.Save("Output.pdf")
+    'Close the document. 
+    document.Close(True)
+End Using
+
+{% endhighlight %}
+
+{% endtabs %}  
+
 ## Performing OCR on image
 
 The below code example illustrates how to perform OCR on image file using [PerformOCR](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRProcessor.html#Syncfusion_OCRProcessor_OCRProcessor_PerformOCR_System_Drawing_Bitmap_System_String_) method in [OCRProcessor](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRProcessor.html) class.
@@ -1005,6 +1064,178 @@ End Using
 
 N> The OCR Engine Mode is supported only in the Tesseract version 4.0 and above.
 
+## Performing OCR with different OCR Image Enhancement Mode
+
+The `ImageEnhancementMode` property is used to set the OCR image enhancement modes. By default, OCR works with the `EnhanceForRecognitionOnly` image enhancement mode. Kindly refer to the following code example to perform OCR with different OCR image enhancement segmentation mode.
+
+The following table describes the available OCR image enhancement modes and their respective purposes.
+
+<table>
+<thead>
+<tr>
+<th>
+OCR Image Enhancement Mode<br/><br/></th><th>
+Description<br/><br/></th></tr>
+</thead>
+<tbody>
+<tr>
+<td>
+EnhanceForRecognitionOnly<br/><br/></td><td>
+Image is enhanced internally to improve OCR accuracy, but the original image is retained in the output.<br/><br/></td></tr>
+<tr>
+<td>
+EnhanceAndIncludeInOutput<br/><br/></td><td>
+Image is enhanced and the enhanced version is used in the output document.<br/><br/></td></tr>
+<tr>
+<td>
+None<br/><br/></td><td>
+No image enhancement is performed. The original image is used for OCR processing.<br/><br/></td></tr>
+</tbody>
+</table>
+
+{% tabs %}
+
+{% highlight c# tabtitle="C# [Cross-platform]" %}
+
+using Syncfusion.OCRProcessor;
+using Syncfusion.Pdf.Parsing;
+
+// Initialize the OCR processor
+using (OCRProcessor processor = new OCRProcessor())
+{
+    // Load an existing PDF document
+    PdfLoadedDocument document = new PdfLoadedDocument("Input.pdf");
+    // Set the OCR language to English for text recognition.
+    processor.Settings.Language = Languages.English;
+    // Set the OCR image enhancement mode to improve recognition accuracy.
+    processor.ImageEnhancementMode = OcrImageEnhancementMode.EnhanceForRecognitionOnly;
+    // Perform OCR with input document and tessdata (Language packs)
+    processor.PerformOCR(document);
+    // Save the processed PDF document
+    document.Save("Output.pdf");
+    // Close the document 
+    document.Close(true);
+}
+
+{% endhighlight %}
+
+{% highlight vb.net tabtitle="VB.NET [Windows-specific]" %}
+
+Imports Syncfusion.OCRProcessor
+Imports Syncfusion.Pdf.Parsing
+
+' Initialize the OCR processor
+Using processor As New OCRProcessor()
+
+    ' Load an existing PDF document
+    Dim document As New PdfLoadedDocument("Input.pdf")
+    ' Set the OCR language to English for text recognition
+    processor.Settings.Language = Languages.English
+    ' Set the OCR image enhancement mode to improve recognition accuracy
+    processor.ImageEnhancementMode = OcrImageEnhancementMode.EnhanceForRecognitionOnly
+    ' Perform OCR with input document and tessdata (Language packs)
+    processor.PerformOCR(document)
+    ' Save the processed PDF document
+    document.Save("Output.pdf")
+    ' Close the document
+    document.Close(True)
+
+End Using
+
+{% endhighlight %}
+{% endtabs %} 
+
+## Performing OCR with different OCR Image Enhancement options
+
+The `ImageEnhancementMode` property is used to set the OCR image enhancement mode. Refer to the following code example to perform OCR with different image enhancement options.
+
+The following table describes the available OCR image enhancement options and their respective purposes.
+
+<table>
+<thead>
+<tr>
+<th>
+OCR Image Enhancement options<br/><br/></th><th>
+Description<br/><br/></th></tr>
+</thead>
+<tbody>
+<tr>
+<td>
+IsGrayscaleEnabled<br/><br/></td><td>
+Simplifies image data by removing color information, making text easier to detect.<br/><br/></td></tr>
+<tr>
+<td>
+IsDeskewEnabled<br/><br/></td><td>
+Corrects tilted or rotated text for proper alignment.<br/><br/></td></tr>
+<tr>
+<td>
+IsDenoiseEnabled<br/><br/></td><td>
+Removes speckles and artifacts that can interfere with character recognition.<br/><br/></td></tr>
+<tr>
+<td>
+IsConstrastEnabled<br/><br/></td><td>
+Enhances text visibility against the background.<br/><br/></td></tr>
+<tr>
+<td>
+IsBinarizeEnabled<br/><br/></td><td>
+Converts images to black-and-white for sharper text edges, using advanced thresholding methods.<br/><br/></td></tr>
+</tbody>
+</table>
+
+{% tabs %}
+
+{% highlight c# tabtitle="C# [Cross-platform]" %}
+
+using Syncfusion.OCRProcessor;
+using Syncfusion.Pdf.Parsing;
+
+// Initialize the OCR processor
+using (OCRProcessor processor = new OCRProcessor())
+{
+    // Load an existing PDF document
+    PdfLoadedDocument document = new PdfLoadedDocument("Input.pdf");
+    // Set the OCR language to English for text recognition.
+    processor.Settings.Language = Languages.English;
+    // Set the options for image enhancement during the OCR process.
+    OcrImageEnhancementOptions options = new OcrImageEnhancementOptions();
+    // Enable grayscale conversion to improve OCR accuracy by reducing color noise.
+    options.IsGrayscaleEnabled = true;
+    // Perform OCR with input document and tessdata (Language packs)
+    processor.PerformOCR(document);
+    // Save the processed PDF document
+    document.Save("Output.pdf");
+    // Close the document 
+    document.Close(true);
+}
+
+{% endhighlight %}
+
+{% highlight vb.net tabtitle="VB.NET [Windows-specific]" %}
+
+Imports Syncfusion.OCRProcessor
+Imports Syncfusion.Pdf.Parsing
+
+' Initialize the OCR processor inside a Using block to ensure proper disposal.
+Using processor As New OCRProcessor()
+    ' Load an existing PDF document.
+    Dim document As New PdfLoadedDocument("Input.pdf")
+    ' Set the OCR language to English for text recognition.
+    processor.Settings.Language = Languages.English
+    ' Set the options for image enhancement during the OCR process.
+    Dim options As New OcrImageEnhancementOptions()
+    ' Enable grayscale conversion to improve OCR accuracy by reducing color noise.
+    options.IsGrayscaleEnabled = True
+    ' Perform OCR on the input document using tessdata (language packs).
+    processor.PerformOCR(document)
+    ' Save the processed PDF document.
+    document.Save("Output.pdf")
+    ' Close the document and release resources.
+    document.Close(True)
+End Using
+
+{% endhighlight %}
+{% endtabs %} 
+
 ## White List
 
 The [WhiteList](https://help.syncfusion.com/cr/document-processing/Syncfusion.OCRProcessor.OCRSettings.html#Syncfusion_OCRProcessor_OCRSettings_WhiteList) property specifies a list of characters that the OCR engine is only allowed to recognize. If a character is not on the white list, it will not be included in the output OCR results. For more information, refer to the following code sample. 
diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md b/Document-Processing/PDF/PDF-Library/NET/Working-with-OCR/Working-with-OCR.md
@@ -11,7 +11,15 @@ keywords: Assemblies
 
 Optical character recognition (OCR) is a technology used to convert scanned paper documents in the form of PDF files or images into searchable and editable data.  
 
-The [Syncfusion<sup>&reg;</sup> OCR processor library](https://www.syncfusion.com/document-processing/pdf-framework/net/pdf-library/ocr-process) has extended support to process OCR on scanned PDF documents and images with the help of Google’s [Tesseract](https://github.com/tesseract-ocr/tesseract) Optical Character Recognition engine.  
+The [Syncfusion<sup>&reg;</sup> OCR processor library](https://www.syncfusion.com/document-processing/pdf-framework/net/pdf-library/ocr-process) has extended support to process OCR on scanned PDF documents and images with the help of Google’s [Tesseract](https://github.com/tesseract-ocr/tesseract) Optical Character Recognition engine.
+
+An inbuilt `image preprocessor` has been added to the OCR to prepare images for optimal recognition. This step ensures cleaner input and reduces OCR errors. The preprocessor supports the following enhancements:
+
+* **Convert to Grayscale** – Simplifies image data by removing color information, making text easier to detect.
+* **Deskew** – Corrects tilted or rotated text for proper alignment.
+* **Denoise** – Removes speckles and artifacts that can interfere with character recognition.
+* **Apply Contrast Adjustment** – Enhances text visibility against the background.
+* **Apply Binarize** – Converts images to black-and-white for sharper text edges, using advanced thresholding methods
 
 The Syncfusion<sup>&reg;</sup> OCR processor library works seamlessly in various platforms: Azure App Services, Azure Functions, AWS Textract, Docker, WinForms, WPF, Blazor, ASP.NET MVC, ASP.NET Core with Windows, MacOS and Linux. 
 
@@ -85,14 +93,6 @@ ASP.NET
 </tr>
 <tr>
 <td>
-ASP.NET MVC4
-</td>
-<td>
-{{'[Syncfusion.Pdf.OCR.AspNet.Mvc4.nupkg](https://www.nuget.org/packages/Syncfusion.Pdf.OCR.AspNet.Mvc4)'| markdownify }}
-</td>
-</tr>
-<tr>
-<td>
 ASP.NET MVC5
 </td>
 <td>
@@ -133,6 +133,7 @@ Windows Forms, WPF, ASP.NET, and ASP.NET MVC
 <li>Syncfusion.OCRProcessor.Base.dll</li>
 <li>Syncfusion.Pdf.Base.dll</li>
 <li>Syncfusion.Compression.Base.dll</li>
+<li>Syncfusion.ImagePreProcessor.Base.dll</li>
 </ul>
 </td>
 </tr>
@@ -146,21 +147,23 @@ Windows Forms, WPF, ASP.NET, and ASP.NET MVC
 <li>Syncfusion.PdfImaging.Portable.dll</li>
 <li>Syncfusion.Pdf.Portable.dll</li>
 <li>Syncfusion.Compression.Portable.dll</li>
-<li>{{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/2.88.0-preview.232)'| markdownify }} package</li>
+<li>{{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package</li>
+<li>Syncfusion.ImagePreProcessor.Portable.dll</li>
 </ul>
 </td>
 </tr>
 <tr>
 <td>
-.NET 8/.NET 9
+.NET 8/.NET 9/.NET 10
 </td>
 <td>
 <ul>
 <li>Syncfusion.OCRProcessor.NET.dll</li>
 <li>Syncfusion.PdfImaging.NET.dll</li>
 <li>Syncfusion.Pdf.NET.dll</li>
 <li>Syncfusion.Compression.NET.dll</li>
-<li>{{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/2.88.0-preview.232)'| markdownify }} package</li>
+<li>{{'[SkiaSharp](https://www.nuget.org/packages/SkiaSharp/3.119.1)'| markdownify }} package</li>
+<li>Syncfusion.ImagePreProcessor.NET.dll</li>
 </ul>
 </td>
 </tr>
diff --git a/Document-Processing/PDF/PDF-Library/NET/Working-with-PDF-Conformance.md b/Document-Processing/PDF/PDF-Library/NET/Working-with-PDF-Conformance.md
@@ -1220,7 +1220,30 @@ You can create a PDF/X-1a document by specifying the conformance level as ```Pdf
 
 {% highlight c# tabtitle="C# [Cross-platform]" %} 
 
-//Creating PDF/X-conformance documents is not supported on C#.NET cross-platform environments.
+using Syncfusion.Pdf;
+using Syncfusion.Pdf.Graphics;
+
+//Create a new document with PDF/x standard.
+PdfDocument document = new PdfDocument(PdfConformanceLevel.Pdf_X1A2001);
+//Add a page.
+PdfPage page = document.Pages.Add();
+//Set color space. 
+document.ColorSpace = PdfColorSpace.CMYK;
+
+//Create Pdf graphics for the page.
+PdfGraphics graphics = page.Graphics;
+//Create a solid brush.
+PdfBrush brush = new PdfSolidBrush(Color.Black);
+//Load the TrueType font from the local file.
+FileStream fontStream = new FileStream("Arial.ttf", FileMode.Open, FileAccess.Read); 
+//Set the font.
+PdfFont font = new PdfTrueTypeFont(fontStream, 14);
+//Draw the text.
+graphics.DrawString("Hello world!", pdfFont, brush, new PointF(20, 20));
+
+//Save and close the document.
+document.Save("Output.pdf");
+document.Close(true);
 
 {% endhighlight %}