From 10610c54b1eb25184859da0f42b28c5ae5ab8c5d Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 11 Oct 2023 12:03:32 +0100 Subject: [PATCH 1/4] consistency_checks.py: Output errors to json file --- consistency_checks.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/consistency_checks.py b/consistency_checks.py index 2c7c8fc..17e9d0a 100644 --- a/consistency_checks.py +++ b/consistency_checks.py @@ -1,3 +1,4 @@ +import datetime import json import gzip import random @@ -203,6 +204,28 @@ def _skip_errors(self, stats): else: return True + def _output_errors(self, stats): + """Output errors to json file""" + out = stats.copy() + referencing = [] + duplicates = [] + missing = [] + for error in self.error_log: + if error.startswith("BODS referencing error"): + statement_id = error.split("Statement")[-1].split("not found")[0].strip() + referencing.append(statement_id) + if error.startswith("BODS duplicate error"): + statement_id = error.split("(")[-1].split(")")[0].strip() + duplicates.append(statement_id) + if error.startswith("BODS duplicate error"): + statement_id = error.split(":")[-1].split(")")[0].strip() + missing.append(statement_id) + out["ref_errors"] = referencing + out["dup_errors"] = duplicates + out["mis_errors"] = missing + with open(f"errors-{datetime.date.today().strftime('%d%m%y')}.json", "w") as out_file: + json.dump(out, out_file, indent = 4) + def _process_errors(self): """Check for any errors in log""" for error in self.error_log[:self.error_limit]: @@ -211,6 +234,7 @@ def _process_errors(self): output_text(self.console, f"{len(self.error_log)} errors: truncated at {self.error_limit}", "red") if len(self.error_log) > 0: stats = self._error_stats() + self._output_errors(stats) if not self._skip_errors(stats): estats = [] for e in stats: From 373a8fdd5cdb011c7ad3ea989dd4de6b7f5a584b Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 11 Oct 2023 12:34:45 +0100 Subject: [PATCH 2/4] tests/test_stages.py: Fix issue with test_json_zip --- tests/test_stages.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_stages.py b/tests/test_stages.py index 4a47763..9a3542d 100644 --- a/tests/test_stages.py +++ b/tests/test_stages.py @@ -139,7 +139,10 @@ def test_json_zip(self, temp_dir, output_dir, source_dir): data = output_file.readlines() print(data) assert len(data) == 20 - assert json.loads(data[0].strip())['interestedParty']['describedByPersonStatement'] == '14105856581894595060' + for d in data: + json_data = json.loads(d.strip()) + if json_data["statementID"] == "8359172029532323967": + assert json_data['interestedParty']['describedByPersonStatement'] == '14105856581894595060' def test_sqlite_zip(self, temp_dir, output_dir, source_dir): """Test creation of output sqlite.db.gz file""" From d5a500c9ae46e8f2998b8f2d7407f2cfaaf6b3ba Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 11 Oct 2023 12:51:07 +0100 Subject: [PATCH 3/4] tests/test_stages.py: Improve test_json_zip test --- tests/test_stages.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_stages.py b/tests/test_stages.py index 9a3542d..b1541e3 100644 --- a/tests/test_stages.py +++ b/tests/test_stages.py @@ -139,10 +139,14 @@ def test_json_zip(self, temp_dir, output_dir, source_dir): data = output_file.readlines() print(data) assert len(data) == 20 + count = 0 for d in data: json_data = json.loads(d.strip()) if json_data["statementID"] == "8359172029532323967": + count += 1 + print(json_data) assert json_data['interestedParty']['describedByPersonStatement'] == '14105856581894595060' + assert count == 2 def test_sqlite_zip(self, temp_dir, output_dir, source_dir): """Test creation of output sqlite.db.gz file""" From a5e65a87b18b08f87a16dc226edc3f2def85dd8e Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 11 Oct 2023 14:49:20 +0100 Subject: [PATCH 4/4] setup.py: Pin Werkzeug to version that works with Frozen-Flask --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index fd32240..db341a9 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ "jsonref", "ipython", "flatterer", + "Werkzeug<3", "frozen-flask", "bootstrap-flask", "markdown",