diff --git a/src/analyzer_engine/csv_analyzer_engine.py b/src/analyzer_engine/csv_analyzer_engine.py index 73c7dc4..72df2ae 100644 --- a/src/analyzer_engine/csv_analyzer_engine.py +++ b/src/analyzer_engine/csv_analyzer_engine.py @@ -28,4 +28,4 @@ def analyze_csv( csv_list = list(csv.reader(csv_file)) csv_dict = {header: list(map(str, values)) for header, *values in zip(*csv_list)} analyzer_results = self.analyze_dict(csv_dict, language, keys_to_skip) - return analyzer_results + return list(analyzer_results) diff --git a/app.py b/src/app.py similarity index 69% rename from app.py rename to src/app.py index 4270704..48e840f 100644 --- a/app.py +++ b/src/app.py @@ -1,6 +1,7 @@ import json import logging import os +import uuid from typing import Tuple from flask import Flask, request, jsonify, Response @@ -36,26 +37,29 @@ def analyze() -> Tuple[str, int]: if file.filename == '': return jsonify({'error': 'No selected file'}), 400 - filepath = f'uploads/{file.filename}' + filepath = f'uploads/{uuid.uuid4()}' file.save(filepath) self.logger.info(f"Successfully saved file: {filepath}") - analyzer_result_list = self.engine.analyze_csv( + analyzer_results = self.engine.analyze_csv( csv_full_path=filepath, language="en" ) + self.logger.debug(f"Analyzed file with results: {analyzer_results}") + os.remove(filepath) + self.logger.info(f"Successfully removed file: {filepath}") - resp = {} - for result in analyzer_result_list: - resp['key'] = result.key - resp['value'] = result.value - resp['recognizer_results'] = json.dumps( - result.recognizer_results, - default=lambda o: o.to_dict(), - sort_keys=True, - ) + analyzer_results_dict = {} + for a in analyzer_results: + recognizer_results = [] + for r in a.recognizer_results: + recognizer_results.append([o.to_dict() for o in r]) + analyzer_results_dict[a.key] = { + "value": a.value, + "recognizer_results": recognizer_results + } - return jsonify(resp), 200 + return jsonify(analyzer_results_dict), 200 except Exception as e: self.logger.error( f"A fatal error occurred during execution of " diff --git a/tests/app_test.py b/tests/app_test.py new file mode 100644 index 0000000..7101b09 --- /dev/null +++ b/tests/app_test.py @@ -0,0 +1,42 @@ +import pytest +import json + +from app import Server + +@pytest.fixture() +def app(): + app = Server().app + app.config.update({ + "TESTING": True, + }) + + yield app + + +@pytest.fixture() +def client(app): + return app.test_client() + + +def test_health(client): + response = client.get("/health") + assert response.status_code == 200 + + +def test_analyze_csv_file(client): + expected_response_id = {'value': ['1', '2', '3'], 'recognizer_results': [[], [], []]} + + response = client.post("/analyze", data={ + "file": open('./tests/analyzer_engine/sample_data.csv', 'rb'), + }) + + assert response.status_code == 200 + data = json.loads(response.get_data(as_text=True)) + # No PII in id + assert data['id'] == expected_response_id + # first row has no PII + assert data['comments']['recognizer_results'][0] == [] + # second row has PII + assert data['comments']['recognizer_results'][1][0]['entity_type'] == 'US_DRIVER_LICENSE' + assert data['comments']['recognizer_results'][1][0]['start'] == 34 + assert data['comments']['recognizer_results'][1][0]['end'] == 42