Merge branch 'master' into pcp-shared-schedules-fixes

pennlabs · Oct 29, 2023 · 1d2cec7 · 1d2cec7
2 parents 89a1c2b + 47a0d1e
commit 1d2cec7
Show file tree

Hide file tree

Showing 12 changed files with 2,565 additions and 1,378 deletions.
diff --git a/Pipfile b/Pipfile
@@ -0,0 +1,11 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+
+[dev-packages]
+
+[requires]
+python_version = "3.10"
diff --git a/backend/Pipfile.lock b/backend/Pipfile.lock
diff --git a/backend/README.md b/backend/README.md
@@ -60,6 +60,13 @@ pasted in this public README). If you are not in Penn Labs, see the "Loading Cou
 
 NOTE: when using `pipenv`, environment variables are only refreshed when you exit your shell and rerun `pipenv shell` (this is a common source of confusing behavior, so it's good to know about).
 
+## Linting
+
+We use `black`, `flake8`, and 'isort' to lint our code. Once you are in the `backend` directory, you can run the following commands to lint:
+1. `pipenv run black`
+2. `pipenv run isort`
+3. `pipenv run flake8`
+
 ## Loading Courses Data 
 
 ### Via Database Dump (Penn Labs members)
@@ -193,10 +200,3 @@ If you don't want to use docker alone, you can also set up and run the dev envir
 7. Running tests
    - Run `python manage.py test` to run our test suite.
    - To run a specific test, you can use the format `python manage.py test tests.review.test_api.OneReviewTestCase.test_course` (also note that in this example, you can use any prefix of that path to run a larger set of tests).
-
-## Linting
-```
-pipenv run black -l100 .
-pipenv run isort .
-pipenv run flake8 .
-```
diff --git a/backend/review/import_utils/import_to_db.py b/backend/review/import_utils/import_to_db.py
@@ -270,9 +270,9 @@ def stat(key, amt=1):
     return stat
 
 
-def import_summary_rows(summaries, show_progress_bar=True):
+def import_summary_rows(summaries: iter, show_progress_bar=True):
     """
-    Imports summary rows given a summaries list.
+    Imports summary rows given a summaries iterable.
     """
     stats = dict()
     stat = gen_stat(stats)

diff --git a/backend/review/import_utils/parse_sql.py b/backend/review/import_utils/parse_sql.py
@@ -5,6 +5,8 @@
 from lark import Lark, Transformer
 from tqdm import tqdm
 
+from courses.util import semester_suffix_map_inv, translate_semester_inv
+
 
 """
 This file contains functionality for parsing the SQL dump files that we get from ISC.
@@ -72,7 +74,14 @@ def row(self, items):
         to generate a row in a quasi-JSON format. Who needs MongoDB?
         """
         _, col_names, values = items
-        return dict(zip(col_names, values))
+        row_dict = dict(zip(col_names, values))
+
+        # Convert new OpenData API semesters to internal semesters
+        if "TERM" in row_dict:
+            stripped_term = row_dict["TERM"].strip()
+            if stripped_term[-2:] in semester_suffix_map_inv:
+                row_dict["TERM"] = translate_semester_inv(stripped_term)
+        return row_dict
 
     def TOKEN(self, items):
         """

diff --git a/backend/review/management/commands/iscimport.py b/backend/review/management/commands/iscimport.py
@@ -27,6 +27,16 @@
 ISC_DESC_TABLE = "TEST_PCR_COURSE_DESC_V"
 
 
+def assert_semesters_not_current(semesters):
+    current_semester = get_current_semester()
+    for semester in semesters:
+        if semester == current_semester:
+            raise ValueError(
+                f"You cannot import reviews for the current semester ({current_semester}). "
+                f"Did you forget to update the SEMESTER option in the Django admin console?"
+            )
+
+
 class Command(BaseCommand):
     help = """
     Import course review data from the zip of mysqldump files that we get from ISC every semester.
@@ -150,7 +160,7 @@ def handle(self, *args, **kwargs):
         import_all = kwargs["import_all"]
         s3_bucket = kwargs["s3_bucket"]
         is_zip_file = kwargs["zip"] or s3_bucket is not None
-        summary_file = kwargs["summary_file"]
+        summary_file = kwargs["summary_file"]  # either summary table or summary hist table
         import_details = kwargs["import_details"]
         import_descriptions = kwargs["import_descriptions"]
         show_progress_bar = kwargs["show_progress_bar"]
@@ -164,13 +174,7 @@ def handle(self, *args, **kwargs):
                 "Must define semester with (-s) or explicitly import all semesters with (-a)."
             )
         if semesters is not None:
-            current_semester = get_current_semester()
-            for semester in semesters:
-                if semester == current_semester:
-                    raise ValueError(
-                        f"You cannot import reviews for the current semester ({current_semester}). "
-                        f"Did you forget to update the SEMESTER option in the Django admin console?"
-                    )
+            assert_semesters_not_current(semesters)
 
         if s3_bucket is not None:
             fp = "/tmp/pcrdump.zip"
@@ -185,63 +189,66 @@ def handle(self, *args, **kwargs):
             "modified if the whole script succeeds."
         )
 
-        with transaction.atomic():  # Only commit changes if the whole script succeeds
-            # TODO: When we import details and crosslistings, get their data here too.
-            tables_to_get = [summary_file]
-            idx = 1
-            detail_idx = -1
-            if import_details:
-                tables_to_get.append(ISC_RATING_TABLE)
-                detail_idx = idx
-                idx += 1
-
-            description_idx = -1
-            if import_descriptions:
-                tables_to_get.append(ISC_DESC_TABLE)
-                description_idx = idx
-                idx += 1
-
-            files = self.get_files(src, is_zip_file, tables_to_get)
-
-            summary_fo = files[0]
-            print("Loading summary file...")
-            summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
+        tables_to_get = [summary_file]
+        idx = 1
+        detail_idx = -1
+        if import_details:
+            tables_to_get.append(ISC_RATING_TABLE)
+            detail_idx = idx
+            idx += 1
+
+        description_idx = -1
+        if import_descriptions:
+            tables_to_get.append(ISC_DESC_TABLE)
+            description_idx = idx
+            idx += 1
+
+        files = self.get_files(src, is_zip_file, tables_to_get)
+        summary_fo = files[0]
+
+        print("Loading summary file...")
+        summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
+        gc.collect()
+        print("SQL parsed and loaded!")
+        if not import_all:
+            full_len = len(summary_rows)
+            summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
             gc.collect()
-            print("SQL parsed and loaded!")
-
-            if not import_all:
-                full_len = len(summary_rows)
-                summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
-                gc.collect()
-                filtered_len = len(summary_rows)
-                print(f"Filtered {full_len} rows down to {filtered_len} rows.")
-
-            semesters = sorted(list({r["TERM"] for r in summary_rows}))
-            gc.collect()
-            to_delete = Review.objects.filter(section__course__semester__in=semesters)
-            delete_count = to_delete.count()
-
-            if delete_count > 0:
-                if not force:
-                    prompt = input(
-                        f"This import will overwrite {delete_count} rows that have already been"
-                        + "imported. Continue? (y/N) "
+            filtered_len = len(summary_rows)
+            print(f"Filtered {full_len} rows down to {filtered_len} rows.")
+        semesters = sorted(list({r["TERM"] for r in summary_rows}))
+        gc.collect()
+
+        for semester in semesters:
+            print(f"Loading {semester}...")
+            with transaction.atomic():  # Commit changes if all imports for the semester succeed
+                to_delete = Review.objects.filter(section__course__semester=semester)
+                delete_count = to_delete.count()
+                if delete_count > 0:
+                    if not force:
+                        prompt = input(
+                            f"This import will overwrite {delete_count} rows that have already been"
+                            + "imported. Continue? (y/N) "
+                        )
+                        if prompt.strip().upper() != "Y":
+                            print("Aborting...")
+                            return 0
+
+                    print(
+                        f"Deleting {delete_count} existing reviews for {semester} "
+                        "from the database..."
                     )
-                    if prompt.strip().upper() != "Y":
-                        print("Aborting...")
-                        return 0
+                    to_delete.delete()
 
-                print(
-                    f"Deleting {delete_count} existing reviews for semesters from the database..."
+                print(f"Importing reviews for semester {semester}")
+                stats = import_summary_rows(
+                    (r for r in summary_rows if r["TERM"] == semester), show_progress_bar
                 )
-                to_delete.delete()
-
-            print(f"Importing reviews for semester(s) {', '.join(semesters)}")
-            stats = import_summary_rows(summary_rows, show_progress_bar)
-            print(stats)
+                print(stats)
 
-            gc.collect()
+                gc.collect()
 
+        with transaction.atomic():
             if import_details:
                 print("Loading details file...")
                 stats = import_ratings_rows(
@@ -260,16 +267,16 @@ def handle(self, *args, **kwargs):
                 )
                 print(stats)
 
-            self.close_files(files)
-            # invalidate cached views
-            print("Invalidating cache...")
-            del_count = clear_cache()
-            print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")
+        self.close_files(files)
+        # invalidate cached views
+        print("Invalidating cache...")
+        del_count = clear_cache()
+        print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")
 
-            gc.collect()
+        gc.collect()
 
-            print("Recomputing Section.has_reviews...")
-            recompute_has_reviews()
+        print("Recomputing Section.has_reviews...")
+        recompute_has_reviews()
 
         print("Done.")
         return 0
diff --git a/backend/tests/review/test_import.py b/backend/tests/review/test_import.py
@@ -116,6 +116,15 @@ def test_parse_descriptions(self):
         self.assertIsInstance(parse, dict)
         self.assertDictEqual(expected, parse)
 
+    def test_semester_transformation(self):
+        query = """
+        INSERT into PCRDEV.TEST_PCR_COURSE_DESC_V
+        (TERM) Values ('202230');
+        """
+        parse = parse_row(query)
+        expected = {"TERM": "2022C"}
+        self.assertDictEqual(parse, expected)
+
 
 class ReviewImportTestCase(TestCase):
     def setUp(self):