Skip to content

Commit

Permalink
feat: add support to analyze images
Browse files Browse the repository at this point in the history
  • Loading branch information
mallikarjun-br committed Oct 9, 2024
1 parent 39ef094 commit 344d92f
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 10 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ cat sample.csv | pii analyze --csv | pii anonymize
cat sample.csv | pii analyze --csv | pii anonymize | jq -r '.text'
cat sample.csv | pii analyze --csv | pii anonymize | jq -r '.text' > anonymized.csv


# img files
cat sample.png | pii analyze --img


# vault integration
./vault.sh # start and configure vault server and transit secret engine keys
echo "My name is Don Stark and my phone number is 212-555-5555" | pii anonymize --vaulturl "http://127.0.0.1:8200" --vaultkey "orders"
Expand Down
58 changes: 48 additions & 10 deletions src/cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import argparse
import io
import json

from presidio_analyzer import RecognizerResult
from presidio_analyzer.analyzer_engine import AnalyzerEngine
from presidio_anonymizer.entities.engine.result.operator_result import OperatorResult
from presidio_image_redactor import ImageAnalyzerEngine
from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from presidio_anonymizer import AnonymizerEngine, BatchAnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine
from operators.vault import Vault
from PIL import Image
from presidio_image_redactor import ImageRedactorEngine


import sys
import logging

Expand All @@ -19,15 +25,46 @@

def analyze(args):
analyzer_results = None
nlp_engine = FlairNLPEngine(NLP_ENGINE)
nlp_engine, registry = nlp_engine.create_nlp_engine()
engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)
text = sys.stdin.read()
if args.csv:
engine = CSVAnalyzerEngine(engine)
analyzer_results = engine.analyze(text=text, language=args.language)
text = None
image = None
if args.img:
image = Image.open(io.BytesIO(sys.stdin.buffer.read()))
analyzer_results = ImageAnalyzerEngine().analyze(image=image, language=args.language)
else:
nlp_engine = FlairNLPEngine(NLP_ENGINE)
nlp_engine, registry = nlp_engine.create_nlp_engine()
engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)
text = sys.stdin.read()
if args.csv:
engine = CSVAnalyzerEngine(engine)
analyzer_results = engine.analyze(text=text, language=args.language)

output = format_output(analyzer_results, text, image)
print(json.dumps(output, indent=2))
return analyzer_results

output = {
def format_output(analyzer_results, text, image):
if image:
output = io.BytesIO()
image.convert('RGB').save(output, format='JPEG')
return {
"image": list(output.getvalue()),
"analyzer_results": [
{
"entity_type": result.entity_type,
"start": result.start,
"end": result.end,
"score": result.score,
"left" : result.left,
"top" : result.top,
"width" : result.width,
"height" : result.height
}
for result in analyzer_results
]
}

return {
"text": text,
"analyzer_results": [
{
Expand All @@ -41,8 +78,8 @@ def analyze(args):
for result in analyzer_results
],
}
print(json.dumps(output, indent=2))
return analyzer_results




def anonymize(args):
Expand Down Expand Up @@ -106,6 +143,7 @@ def main():
"analyze", description="Analyze inputs and return PII detection results"
)
analyzer_parser.add_argument("--csv", action="store_true")
analyzer_parser.add_argument("--img", action="store_true")
analyzer_parser.add_argument("--language", required=False, type=str, default="en")
analyzer_parser.set_defaults(func=analyze)

Expand Down

0 comments on commit 344d92f

Please sign in to comment.