Skip to content

Commit

Permalink
Merge pull request #96 from SyncfusionExamples/910367
Browse files Browse the repository at this point in the history
910367: OCR with multiple language github sample.
  • Loading branch information
chinnumuniyappan authored Sep 17, 2024
2 parents edb1ceb + 1360b5e commit 9e7b184
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.11.35222.181
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OCR with multiple langauages", "OCR with multiple languages\OCR with multiple languages.csproj", "{31453861-AEBA-4C3D-89DD-BE4CF523F717}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Debug|Any CPU.Build.0 = Debug|Any CPU
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.ActiveCfg = Release|Any CPU
{31453861-AEBA-4C3D-89DD-BE4CF523F717}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {E1E0ADD2-5280-4B74-8CF7-D831048BE8DA}
EndGlobalSection
EndGlobal
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>OCR_with_multiple_langauages</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.PDF.OCR.NET" Version="26.2.14" />
</ItemGroup>

<ItemGroup>
<None Update="arialuni.ttf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\arialuni.ttf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="tessdata\ara.traineddata">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="tessdata\deu.traineddata">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="tessdata\ell.traineddata">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="tessdata\eng.traineddata">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="tessdata\fra.traineddata">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

using Syncfusion.OCRProcessor;
using Syncfusion.Pdf.Parsing;


// Initialize the OCR processor within a using block to ensure resources are properly disposed
using (OCRProcessor ocrProcessor = new OCRProcessor())
{
// Set the Unicode font for the OCR processor using a TrueType font file
ocrProcessor.UnicodeFont = new Syncfusion.Pdf.Graphics.PdfTrueTypeFont(
new FileStream(Path.GetFullPath(@"Data/arialuni.ttf"), FileMode.Open), // Path to the TrueType font file
12 // Font size
);

// Open the PDF file to be processed
FileStream fileStream = new FileStream(Path.GetFullPath(@"Data/Input.pdf"), FileMode.Open);

// Load the PDF document from the file stream
PdfLoadedDocument loadedDocument = new PdfLoadedDocument(fileStream);

// Configure OCR settings
OCRSettings ocrSettings = new OCRSettings();

// Specify the languages to be used for OCR
ocrSettings.Language = "eng+deu+ara+ell+fra"; // English, German, Arabic, Greek, French

// Apply the OCR settings to the OCR processor
ocrProcessor.Settings = ocrSettings;

// Perform OCR on the loaded PDF document, providing the path to the tessdata directory
ocrProcessor.PerformOCR(loadedDocument, "../../../tessdata");

// Create a file stream to save the OCR-processed PDF
FileStream outputFileStream = new FileStream(Path.GetFullPath(@"Output/Output.pdf"), FileMode.Create);

// Save the OCR-processed document to the file stream
loadedDocument.Save(outputFileStream);

// Close the loaded document and commit changes
loadedDocument.Close(true);

// Close the file streams
outputFileStream.Close();
fileStream.Close();
}



Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 9e7b184

Please sign in to comment.