Skip to content

Commit

Permalink
Merge branch 'main' into image
Browse files Browse the repository at this point in the history
  • Loading branch information
l-r-sowmya authored Jun 4, 2024
2 parents bb7e44d + f1ece7d commit 57fce6d
Show file tree
Hide file tree
Showing 15 changed files with 358 additions and 279 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ jobs:
shell: bash

- name: "Test"
run: 'shopt -s globstar && poetry run python -m unittest tests/**/*.py'
run: 'poetry run pytest'
shell: bash
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
outputs = { self, nixpkgs, flake-utils, poetry2nix }:
flake-utils.lib.eachDefaultSystem (system:
let
nativeBuildInputs = with pkgs; [ stdenv python3 poetry ];
nativeBuildInputs = with pkgs; [ stdenv python3 poetry tesseract ];
buildInputs = with pkgs; [ ];

# see https://github.com/nix-community/poetry2nix/tree/master#api for more functions and examples.
Expand Down
573 changes: 325 additions & 248 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,20 @@ spacy = "^3.7.4"
scipy = "<1.13.0"
presidio-anonymizer = "^2.2.354"
presidio-analyzer = {version = "^2.2.354", extras = ["transformers", "stanza"]}

presidio_image_redactor = {version="^0.0.52"}
pytesseract = {version="^0.3.10"}

pytest = "^8.2.1"



[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
pythonpath = "src"
addopts = [
"--import-mode=importlib",
]
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file removed tests/__init__.py
Empty file.
Empty file removed tests/analyzer_engine/__init__.py
Empty file.
23 changes: 9 additions & 14 deletions tests/analyzer_engine/csv_analyzer_engine_test.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import unittest
import pytest

from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from config.nlp_engine_config import FlairNLPEngine


class CSVAnalayzerEngineTest(unittest.TestCase):
def setUp(self) -> None:
nlp_engine = FlairNLPEngine("flair/ner-english-large")
self.csv_analyser = CSVAnalyzerEngine(nlp_engine)

def test_csv_analyzer_engine_anonymizer(self):

from presidio_anonymizer import BatchAnonymizerEngine
analyzer_results = self.csv_analyser.analyze_csv('./data/sample_data.csv', language="en")

anonymizer = BatchAnonymizerEngine()
anonymized_results = anonymizer.anonymize_dict(analyzer_results)
self.assertIsNotNone(anonymized_results)
def test_csv_analyzer_engine_anonymizer():
nlp_engine = FlairNLPEngine("flair/ner-english-large")
csv_analyzer = CSVAnalyzerEngine(nlp_engine)
from presidio_anonymizer import BatchAnonymizerEngine
analyzer_results = csv_analyzer.analyze_csv('./data/sample_data.csv', language="en")
anonymizer = BatchAnonymizerEngine()
anonymized_results = anonymizer.anonymize_dict(analyzer_results)
assert anonymized_results
Empty file removed tests/recognizer/__init__.py
Empty file.
27 changes: 12 additions & 15 deletions tests/recognizer/flair_recognizer_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import unittest
import pytest

from recognizer.flair_recognizer import FlairRecognizer


class TestFlairRecognizer(unittest.TestCase):
def setUp(self) -> None:
self.recognizer = FlairRecognizer(model_path="flair/ner-english-large")

def test_flair_recognizer_analyse(self):
test_data = "Sowmya is working in Berkley bank as an accountant since 2021"
result = self.recognizer.analyze(test_data)
self.assertGreater(len(result), 0)

def test_flair_recognizes_persons_correctly(self):
test_data = "Sowmya is a person name"
self.assertGreater(len(self.recognizer.analyze(test_data)), 0)
test_data = "XXXXXX is a valid name?"
self.assertEquals(len(self.recognizer.analyze(test_data)), 0)
def test_flair_recognizer_analyze():
recognizer = FlairRecognizer(model_path="flair/ner-english-large")
test_data = "Sowmya is working in Berkley bank as an accountant since 2021"
result = recognizer.analyze(test_data)
assert len(result) > 0

def test_flair_recognizes_persons_correctly():
recognizer = FlairRecognizer(model_path="flair/ner-english-large")
test_data = "Sowmya is a person name"
assert len(recognizer.analyze(test_data)) > 0
test_data = "XXXXXX is a valid name?"
assert len(recognizer.analyze(test_data)) == 0

0 comments on commit 57fce6d

Please sign in to comment.