-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #96 from SyncfusionExamples/910367
910367: OCR with multiple language github sample.
- Loading branch information
Showing
11 changed files
with
121 additions
and
0 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages.sln
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 17 | ||
VisualStudioVersion = 17.11.35222.181 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OCR with multiple langauages", "OCR with multiple languages\OCR with multiple languages.csproj", "{31453861-AEBA-4C3D-89DD-BE4CF523F717}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {E1E0ADD2-5280-4B74-8CF7-D831048BE8DA} | ||
EndGlobalSection | ||
EndGlobal |
Binary file added
BIN
+78.6 KB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/Input.pdf
Binary file not shown.
Binary file added
BIN
+1.51 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Data/arialuni.ttf
Binary file not shown.
48 changes: 48 additions & 0 deletions
48
...R-with-multiple-langauages/OCR with multiple languages/OCR with multiple languages.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<RootNamespace>OCR_with_multiple_langauages</RootNamespace> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Syncfusion.PDF.OCR.NET" Version="26.2.14" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<None Update="arialuni.ttf"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="Data\arialuni.ttf"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="Data\Input.pdf"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="Input.pdf"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="Output\.gitkeep"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="tessdata\ara.traineddata"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="tessdata\deu.traineddata"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="tessdata\ell.traineddata"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="tessdata\eng.traineddata"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
<None Update="tessdata\fra.traineddata"> | ||
<CopyToOutputDirectory>Always</CopyToOutputDirectory> | ||
</None> | ||
</ItemGroup> | ||
|
||
</Project> |
Empty file.
48 changes: 48 additions & 0 deletions
48
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
| ||
using Syncfusion.OCRProcessor; | ||
using Syncfusion.Pdf.Parsing; | ||
|
||
|
||
// Initialize the OCR processor within a using block to ensure resources are properly disposed | ||
using (OCRProcessor ocrProcessor = new OCRProcessor()) | ||
{ | ||
// Set the Unicode font for the OCR processor using a TrueType font file | ||
ocrProcessor.UnicodeFont = new Syncfusion.Pdf.Graphics.PdfTrueTypeFont( | ||
new FileStream(Path.GetFullPath(@"Data/arialuni.ttf"), FileMode.Open), // Path to the TrueType font file | ||
12 // Font size | ||
); | ||
|
||
// Open the PDF file to be processed | ||
FileStream fileStream = new FileStream(Path.GetFullPath(@"Data/Input.pdf"), FileMode.Open); | ||
|
||
// Load the PDF document from the file stream | ||
PdfLoadedDocument loadedDocument = new PdfLoadedDocument(fileStream); | ||
|
||
// Configure OCR settings | ||
OCRSettings ocrSettings = new OCRSettings(); | ||
|
||
// Specify the languages to be used for OCR | ||
ocrSettings.Language = "eng+deu+ara+ell+fra"; // English, German, Arabic, Greek, French | ||
|
||
// Apply the OCR settings to the OCR processor | ||
ocrProcessor.Settings = ocrSettings; | ||
|
||
// Perform OCR on the loaded PDF document, providing the path to the tessdata directory | ||
ocrProcessor.PerformOCR(loadedDocument, "../../../tessdata"); | ||
|
||
// Create a file stream to save the OCR-processed PDF | ||
FileStream outputFileStream = new FileStream(Path.GetFullPath(@"Output/Output.pdf"), FileMode.Create); | ||
|
||
// Save the OCR-processed document to the file stream | ||
loadedDocument.Save(outputFileStream); | ||
|
||
// Close the loaded document and commit changes | ||
loadedDocument.Close(true); | ||
|
||
// Close the file streams | ||
outputFileStream.Close(); | ||
fileStream.Close(); | ||
} | ||
|
||
|
||
|
Binary file added
BIN
+2.38 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ara.traineddata
Binary file not shown.
Binary file added
BIN
+14.7 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/deu.traineddata
Binary file not shown.
Binary file added
BIN
+7.19 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/ell.traineddata
Binary file not shown.
Binary file added
BIN
+22.4 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/eng.traineddata
Binary file not shown.
Binary file added
BIN
+13.6 MB
OCR/.NET/OCR-with-multiple-langauages/OCR with multiple languages/tessdata/fra.traineddata
Binary file not shown.