-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
884238: sample for Find text by regular expression pattern.
- Loading branch information
1 parent
b70da50
commit bae5b22
Showing
4 changed files
with
93 additions
and
0 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
...pattern-and-redact-it-from-PDF-document/.NET-Standard/Find_text_by_regular_expression.sln
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 17 | ||
VisualStudioVersion = 17.8.34330.188 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Find_text_by_regular_expression", "Find_text_by_regular_expression\Find_text_by_regular_expression.csproj", "{8CD3237D-31CF-49D5-B537-F1CCDEC7B681}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{8CD3237D-31CF-49D5-B537-F1CCDEC7B681}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{8CD3237D-31CF-49D5-B537-F1CCDEC7B681}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{8CD3237D-31CF-49D5-B537-F1CCDEC7B681}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{8CD3237D-31CF-49D5-B537-F1CCDEC7B681}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {B475615B-69EF-40F3-9A7F-72E78EE8AB8D} | ||
EndGlobalSection | ||
EndGlobal |
14 changes: 14 additions & 0 deletions
14
...ment/.NET-Standard/Find_text_by_regular_expression/Find_text_by_regular_expression.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Syncfusion.Pdf.Imaging.Net.Core" Version="25.1.42" /> | ||
</ItemGroup> | ||
|
||
</Project> |
Binary file added
BIN
+403 KB
...n-and-redact-it-from-PDF-document/.NET-Standard/Find_text_by_regular_expression/Input.pdf
Binary file not shown.
54 changes: 54 additions & 0 deletions
54
...-and-redact-it-from-PDF-document/.NET-Standard/Find_text_by_regular_expression/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
| ||
using Syncfusion.Pdf.Parsing; | ||
using Syncfusion.Pdf.Redaction; | ||
using Syncfusion.Pdf; | ||
using System.Text.RegularExpressions; | ||
|
||
//Create stream from an existing PDF document. | ||
FileStream docStream = new FileStream(Path.GetFullPath("../../../Input.pdf"), FileMode.Open, FileAccess.Read); | ||
|
||
//Load the existing PDF document. | ||
PdfLoadedDocument document = new PdfLoadedDocument(docStream); | ||
|
||
//Get the first page from the document. | ||
PdfLoadedPage page = document.Pages[0] as PdfLoadedPage; | ||
|
||
TextLineCollection collection = new TextLineCollection(); | ||
//Extract text from first page. | ||
string extractedText = page.ExtractText(out collection); | ||
|
||
foreach (TextLine line in collection.TextLine) | ||
{ | ||
foreach (TextWord word in line.WordCollection) | ||
{ | ||
//Define regular expression pattern to search for dates in the format MM/DD/YYYY | ||
string datePattern = @"\b\d{1,2}\/\d{1,2}\/\d{4}\b"; | ||
//Search for dates | ||
MatchCollection dateMatches = Regex.Matches(word.Text, datePattern); | ||
//Add redaction if the match found | ||
foreach (Match dateMatch in dateMatches) | ||
{ | ||
string textToFindAndRedact = dateMatch.Value; | ||
if (textToFindAndRedact == word.Text) | ||
{ | ||
//Create a redaction object. | ||
PdfRedaction redaction = new PdfRedaction(word.Bounds, Syncfusion.Drawing.Color.Black); | ||
//Add a redaction object into the redaction collection of loaded page. | ||
page.AddRedaction(redaction); | ||
} | ||
} | ||
} | ||
} | ||
|
||
//Redact the contents from the PDF document. | ||
document.Redact(); | ||
|
||
//Create file stream. | ||
using (FileStream outputFileStream = new FileStream(Path.GetFullPath(@"../../../Output.pdf"), FileMode.Create, FileAccess.ReadWrite)) | ||
{ | ||
//Save the PDF document to file stream. | ||
document.Save(outputFileStream); | ||
} | ||
|
||
//Close the document. | ||
document.Close(true); |