diff --git a/All/All_Datalogics_64Bit.sln b/All/All_Datalogics_64Bit.sln index f0eb2384..35bde923 100644 --- a/All/All_Datalogics_64Bit.sln +++ b/All/All_Datalogics_64Bit.sln @@ -167,6 +167,8 @@ Project("{1D787362-28C9-4460-9606-840F3B484350}") = "AddDigitalSignatureRFC3161" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRPage", "..\OCR\OCRPage\OCRPage.vcxproj", "{A4E9D484-B851-4E28-B7C6-77B02180D87B}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -497,6 +499,10 @@ Global {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|x64.Build.0 = Debug|x64 {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.ActiveCfg = Release|x64 {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.Build.0 = Release|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Debug|x64.ActiveCfg = Debug|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Debug|x64.Build.0 = Debug|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Release|x64.ActiveCfg = Release|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/All/All_Datalogics_ARM64.sln b/All/All_Datalogics_ARM64.sln index 266b3871..35a58f76 100644 --- a/All/All_Datalogics_ARM64.sln +++ b/All/All_Datalogics_ARM64.sln @@ -165,6 +165,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureRFC3161" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{CFABC1FE-3F70-47E9-A911-EA085E6D127A}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRPage", "..\OCR\OCRPage\OCRPage.vcxproj", "{E9F316EC-E669-4D97-B01F-92BA3200C2C9}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64 = Debug|ARM64 @@ -491,6 +493,10 @@ Global {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Debug|ARM64.Build.0 = Debug|ARM64 {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.ActiveCfg = Release|ARM64 {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.Build.0 = Release|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Debug|ARM64.Build.0 = Debug|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Release|ARM64.ActiveCfg = Release|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Release|ARM64.Build.0 = Release|ARM64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/All/GNUmakefile b/All/GNUmakefile index a7042398..19dc2858 100644 --- a/All/GNUmakefile +++ b/All/GNUmakefile @@ -79,6 +79,7 @@ SAMPLES = Annotations/CreateAnnotations \ InformationExtraction/CountColorsInDoc \ InformationExtraction/ExtractDocumentInfo \ OCR/OCRImage \ + OCR/OCRPage \ Printing/PostScriptInjection \ Security/AddDigitalSignatureCMS \ Security/AddDigitalSignatureRFC3161 \ diff --git a/All/build_run_all.bat b/All/build_run_all.bat index 6b649b0f..bd15d9af 100644 --- a/All/build_run_all.bat +++ b/All/build_run_all.bat @@ -251,13 +251,14 @@ SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% ContentExtraction\ExtractFonts" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% InformationExtraction\CountColorsInDoc" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% InformationExtraction\ExtractDocumentInfo" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% OCR\OCRImage" +SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% OCR\OCRPage" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% FileSystem\AlternateFileSystem" REM *** The total number of DL samples. This must be accurate! IF NOT "%ARCH%"=="ARM64" ( SET /A "NUM_DL_SAMPLES=73" ) ELSE ( - SET /A "NUM_DL_SAMPLES=75" + SET /A "NUM_DL_SAMPLES=76" ) diff --git a/All/build_run_all_mac.sh b/All/build_run_all_mac.sh index 320ccee5..affb13c7 100755 --- a/All/build_run_all_mac.sh +++ b/All/build_run_all_mac.sh @@ -110,6 +110,7 @@ declare -a DL_SAMPLE_LIST=( \ "InformationExtraction/CountColorsInDoc" \ "InformationExtraction/ExtractDocumentInfo" \ "OCR/OCRImage" \ + "OCR/OCRPage" \ "Printing/PostScriptInjection" \ "Security/AESEncryption" \ "Security/AddDigitalSignatureCMS" \ diff --git a/All/run_all_DL_samples_mac.sh b/All/run_all_DL_samples_mac.sh index 4784e436..2e1a806c 100755 --- a/All/run_all_DL_samples_mac.sh +++ b/All/run_all_DL_samples_mac.sh @@ -276,6 +276,11 @@ cd ../../OCR/OCRImage ./OCRImage-${stage}.app/Contents/MacOS/OCRImage-${stage} echo "" +echo Running sample OCRPage +cd ../../OCR/OCRPage +./OCRPage-${stage}.app/Contents/MacOS/OCRPage-${stage} +echo "" + # Printing echo Running sample PostScriptInjection diff --git a/OCR/OCRImage/OCRImage.vcxproj b/OCR/OCRImage/OCRImage.vcxproj index 6e5bd686..a31ff955 100644 --- a/OCR/OCRImage/OCRImage.vcxproj +++ b/OCR/OCRImage/OCRImage.vcxproj @@ -5,10 +5,6 @@ Debug ARM64 - - Debug - Win32 - Debug x64 @@ -17,10 +13,6 @@ Release ARM64 - - Release - Win32 - Release x64 @@ -34,12 +26,6 @@ 10.0 - - Application - true - v142 - Unicode - Application true @@ -52,13 +38,6 @@ v143 Unicode - - Application - false - v142 - true - Unicode - Application false @@ -75,18 +54,12 @@ - - - - - - @@ -94,11 +67,6 @@ - - true - $(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - true $(Platform)\$(Configuration)\ @@ -107,11 +75,6 @@ true $(Platform)\$(Configuration)\ - - false - $(Platform)\$(Configuration)\ - $(Platform)\$(Configuration)\ - false $(Platform)\$(Configuration)\ @@ -120,24 +83,6 @@ false $(Platform)\$(Configuration)\ - - - NotUsing - Level3 - Disabled - _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN32;WIN32;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) - true - ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) - - - - Console - true - ..\..\..\Binaries - kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) - DL180pdfl.dll - - NotUsing @@ -175,24 +120,6 @@ DL180pdfl.dll - - - NotUsing - Level3 - Disabled - _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN32;WIN32;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) - true - ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) - - - - Console - true - ..\..\..\Binaries - kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) - DL180pdfl.dll - - NotUsing diff --git a/OCR/OCRPage/Makefile b/OCR/OCRPage/Makefile new file mode 100644 index 00000000..6f7f818a --- /dev/null +++ b/OCR/OCRPage/Makefile @@ -0,0 +1,7 @@ +SAMPNAME=OCRPage +OTHER_OBJS = $(SAMPNAME).o + +include ../../dlutils/common.mak + +$(SAMPNAME).o: $(SRC)/$(SAMPNAME).cpp + diff --git a/OCR/OCRPage/OCRPage.cpp b/OCR/OCRPage/OCRPage.cpp new file mode 100644 index 00000000..6a0ced64 --- /dev/null +++ b/OCR/OCRPage/OCRPage.cpp @@ -0,0 +1,83 @@ +// +// Copyright (c) 2017-2025, Datalogics, Inc. All rights reserved. +// +// +// The OCRPage sample demonstrates how the Library works to OCR a page. +// +// Command-line: (Optional) +// + +#include + +#include "ASExtraCalls.h" +#include "DLExtrasCalls.h" +#include "OCREngineCalls.h" + +#include "InitializeLibrary.h" +#include "APDFLDoc.h" + +#define DIR_LOC "../../../../Resources/Sample_Input/" +#define DEF_INPUT "OCRPage.pdf" +#define DEF_OUTPUT "OCRPage-out.pdf" + +int main(int argc, char **argv) { + APDFLib libInit; + ASErrorCode errCode = 0; + if (libInit.isValid() == false) { + errCode = libInit.getInitError(); + std::cout << "Initialization failed with code " << errCode << std::endl; + return libInit.getInitError(); + } + + std::string csInputFileName(argc > 1 ? argv[1] : DIR_LOC DEF_INPUT); + std::string csOutputFileName(argc > 2 ? argv[2] : DEF_OUTPUT); + std::cout << "Recognizing text in " << csInputFileName.c_str() << std::endl; + + DURING + // Sets the correct location for the OCREngine function table. + gOCREngineHFT = InitOCREngineHFT; + + // Initialize the OCREngine plugin. + if (!OCREngineInitialize()) { + std::cout << "The OCREngine plugin failed to initialize." << std::endl; + errCode = -1; + } + + if (0 == errCode) { + // Obtain first page in document. + APDFLDoc inDoc(csInputFileName.c_str(), true); + PDDoc pdDoc = inDoc.getPDDoc(); + PDPage inputPage1 = PDDocAcquirePage(pdDoc, 0); + + // Set default OCR parameters. + OCRParamsRec ocrParams = PDOCRDefaultParams(); + + // Set languages to configure OCREngine with. + OCRLanguage newLanguages[] = {OCRLanguage_English, OCRLanguage_French, OCRLanguage_ChineseTraditional, + OCRLanguage_ChineseSimplified, OCRLanguage_Japanese}; + + ASInt32 numLanguages = sizeof(newLanguages) / sizeof(newLanguages[0]); + PDOCRParamsSetLanguagesConfigured(&ocrParams, newLanguages, numLanguages); + + // Run OCR on the page. + PDOCRRecognizePage(inputPage1, &ocrParams, OCRMissingFontStrategy_Raise); + + // Save document with recognized text. + ASPathName path = APDFLDoc::makePath(csOutputFileName.c_str()); + PDDocSave(pdDoc, PDSaveFull, path, ASGetDefaultFileSys(), nullptr, nullptr); + ASFileSysReleasePath(nullptr, path); + + // Release resources. + PDPageRelease(inputPage1); + + // Release OCREngine resources and terminate the plugin. + PDOCRReleaseParams(&ocrParams); + OCREngineTerminate(); + } // if 0 == errCode + HANDLER + errCode = ERRORCODE; + libInit.displayError(errCode); + END_HANDLER + + return errCode; // APDFLib's destructor terminates the library. +} diff --git a/OCR/OCRPage/OCRPage.vcxproj b/OCR/OCRPage/OCRPage.vcxproj new file mode 100644 index 00000000..d4fa4a38 --- /dev/null +++ b/OCR/OCRPage/OCRPage.vcxproj @@ -0,0 +1,173 @@ + + + + + Debug + ARM64 + + + Debug + x64 + + + Release + ARM64 + + + Release + x64 + + + + {A4E9D484-B851-4E28-B7C6-77B02180D87B} + Win32Proj + BlankSample + OCRPage + 10.0 + + + + Application + true + v142 + Unicode + + + Application + true + v143 + Unicode + + + Application + false + v142 + true + Unicode + + + Application + false + v143 + true + Unicode + + + + + + + + + + + + + + + + + + true + $(Platform)\$(Configuration)\ + + + true + $(Platform)\$(Configuration)\ + + + false + $(Platform)\$(Configuration)\ + + + false + $(Platform)\$(Configuration)\ + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/OCR/OCRPage/OCRPage_64Bit.sln b/OCR/OCRPage/OCRPage_64Bit.sln new file mode 100644 index 00000000..d481765b --- /dev/null +++ b/OCR/OCRPage/OCRPage_64Bit.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRPage", "OCRPage.vcxproj", "{A4E9D484-B851-4E28-B7C6-77B02180D87B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Debug|x64.ActiveCfg = Debug|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Debug|x64.Build.0 = Debug|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Release|x64.ActiveCfg = Release|x64 + {A4E9D484-B851-4E28-B7C6-77B02180D87B}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {1437FD6E-B1B8-46A5-8F7F-768E4E363E11} + EndGlobalSection +EndGlobal diff --git a/OCR/OCRPage/OCRPage_ARM64.sln b/OCR/OCRPage/OCRPage_ARM64.sln new file mode 100644 index 00000000..4fb0b580 --- /dev/null +++ b/OCR/OCRPage/OCRPage_ARM64.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 d17.14 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRPage_ARM64", "OCRPage.vcxproj", "{E9F316EC-E669-4D97-B01F-92BA3200C2C9}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM64 = Debug|ARM64 + Release|ARM64 = Release|ARM64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Debug|ARM64.Build.0 = Debug|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Release|ARM64.ActiveCfg = Release|ARM64 + {E9F316EC-E669-4D97-B01F-92BA3200C2C9}.Release|ARM64.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {249931B8-EE03-454F-AF00-307325768721} + EndGlobalSection +EndGlobal diff --git a/OCR/README.md b/OCR/README.md index f247cbf2..89160bb8 100644 --- a/OCR/README.md +++ b/OCR/README.md @@ -1,2 +1,5 @@ ## ***OCRImage*** Recognized text within an image using Optical Character Recognition. + +## ***OCRPage*** +Recognized text within a page using Optical Character Recognition. diff --git a/_Input/OCRPage.pdf b/_Input/OCRPage.pdf new file mode 100644 index 00000000..0e1ba4f5 Binary files /dev/null and b/_Input/OCRPage.pdf differ