From ea2d81846b7a02854d9ce2d57d6d41cd60d7fa52 Mon Sep 17 00:00:00 2001 From: Shiva Menta Date: Wed, 16 Oct 2024 18:51:01 -0400 Subject: [PATCH] Update Parsing Scripts --- backend/review/import_utils/import_to_db.py | 9 ++++-- backend/review/import_utils/parse_sql.py | 36 ++++++++++++++++++--- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/backend/review/import_utils/import_to_db.py b/backend/review/import_utils/import_to_db.py index 33b345d0d..bb9a3e56d 100644 --- a/backend/review/import_utils/import_to_db.py +++ b/backend/review/import_utils/import_to_db.py @@ -80,7 +80,12 @@ def import_review(section, instructor, enrollment, responses, form_type, bits, s ) if not created: stat("duplicate_review") - review_bits = [ReviewBit(review=review, field=k, average=v) for k, v in bits.items()] + review_bits = [] + for key, value in bits.items(): + if value is None or value == "null": + stat(f"null value for {key}") + continue + review_bits.append(ReviewBit(review=review, field=key, average=value)) # This saves us a bunch of database calls per row, since reviews have > 10 bits. ReviewBit.objects.bulk_create(review_bits, ignore_conflicts=True) @@ -195,7 +200,7 @@ def import_ratings_row(row, stat): } for key, val in details.items(): - if val is None: + if val is None or val == "null": stat(f"null value for {key}") return diff --git a/backend/review/import_utils/parse_sql.py b/backend/review/import_utils/parse_sql.py index af9371e20..f6ace78a0 100644 --- a/backend/review/import_utils/parse_sql.py +++ b/backend/review/import_utils/parse_sql.py @@ -98,14 +98,42 @@ def number(self, n): (n,) = n return float(n) + def _convert_dd_mon_rr_format(self, date_str: str) -> datetime: + """ + Implemented RR year format logic. + """ + datetime_elems = date_str.split("-") + arg_year_last_digits = int(datetime_elems[-1]) + + current_year = datetime.now().year + current_year_first_digits = current_year // 100 + current_year_last_digits = current_year % 100 + + arg_year_first_digits = current_year_first_digits + if arg_year_last_digits <= 49: + if current_year_last_digits > 49: + arg_year_first_digits += 1 + else: + if current_year_first_digits <= 49: + arg_year_first_digits -= 1 + + datetime_elems[-1] = str(arg_year_first_digits * 100 + arg_year_last_digits) + return datetime.strptime("-".join(datetime_elems), "%d-%b-%Y") + def date(self, items): """ The dump includes the format (parsed at items[1] in this function), - but it's not the same parse tokens that Python uses. From observation, - all the dates are in the same format. If that changes, and dates start - being off, here's a good place to look. + but it's not the same parse tokens that Python uses. We've previously + seen dates in two different formats, but if you encounter a format + not of type '%m/%d/%Y %H:%M:%S' or 'DD-MON-RR', then this function + will need to be modified. """ - return datetime.strptime(items[0], "%m/%d/%Y %H:%M:%S") + if items[1] == "MM/DD/YYYY HH24:MI:SS": + return datetime.strptime(items[0], "%m/%d/%Y %H:%M:%S") + elif items[1] == "DD-MON-RR": + return self._convert_dd_mon_rr_format(items[0]) + + raise ValueError("Received invalid date format.") class TypeTransformer(SQLDumpTransformer):