diff --git a/src/app.py b/src/app.py index c94c97d..08a91d9 100644 --- a/src/app.py +++ b/src/app.py @@ -37,6 +37,7 @@ def analyze() -> Tuple[str, int]: """Execute the analyzer function.""" try: file = request.files['file'] + language = request.form['language'] if file.filename == '': return jsonify({'error': 'No selected file'}), 400 @@ -46,7 +47,7 @@ def analyze() -> Tuple[str, int]: analyzer_results = self.engine.analyze_csv( csv_full_path=filepath, - language="en" + language=language ) self.logger.debug(f"Analyzed file with results: {analyzer_results}") os.remove(filepath) diff --git a/tests/analyzer_engine/csv_analyzer_engine_test.py b/tests/analyzer_engine/csv_analyzer_engine_test.py index e45ef83..72d5f62 100644 --- a/tests/analyzer_engine/csv_analyzer_engine_test.py +++ b/tests/analyzer_engine/csv_analyzer_engine_test.py @@ -8,7 +8,7 @@ def test_csv_analyzer_engine_anonymizer(): nlp_engine = FlairNLPEngine("flair/ner-english-large") csv_analyzer = CSVAnalyzerEngine(nlp_engine) from presidio_anonymizer import BatchAnonymizerEngine - analyzer_results = csv_analyzer.analyze_csv('./sample_data.csv', language="en") + analyzer_results = csv_analyzer.analyze_csv('../sample_data/sample_data.csv', language="en") anonymizer = BatchAnonymizerEngine() anonymized_results = anonymizer.anonymize_dict(analyzer_results) diff --git a/tests/app_test.py b/tests/app_test.py index 7101b09..55fdd97 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -22,12 +22,28 @@ def test_health(client): response = client.get("/health") assert response.status_code == 200 +def test_analyze_non_existent(client): + response = client.post("/analyze", data={ + "language": "en", + }) + + assert response.status_code == 500 + + +def test_analyze_invalid_csv(client): + response = client.post("/analyze", data={ + "file": open('./tests/sample_data/invalid.csv', 'rb'), + }) + + assert response.status_code == 500 + -def test_analyze_csv_file(client): +def test_analyze_pii_csv(client): expected_response_id = {'value': ['1', '2', '3'], 'recognizer_results': [[], [], []]} response = client.post("/analyze", data={ - "file": open('./tests/analyzer_engine/sample_data.csv', 'rb'), + "file": open('./tests/sample_data/sample_data.csv', 'rb'), + "language": "en", }) assert response.status_code == 200 diff --git a/tests/sample_data/invalid.csv b/tests/sample_data/invalid.csv new file mode 100644 index 0000000..51e7b04 --- /dev/null +++ b/tests/sample_data/invalid.csv @@ -0,0 +1,3 @@ +{ + "hello": "json" +} diff --git a/tests/analyzer_engine/sample_data.csv b/tests/sample_data/sample_data.csv similarity index 100% rename from tests/analyzer_engine/sample_data.csv rename to tests/sample_data/sample_data.csv