Skip to content

Commit

Permalink
Merge branch 'master' into pcp-shared-schedules-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mureytasroc committed Oct 29, 2023
2 parents 89a1c2b + 47a0d1e commit 1d2cec7
Show file tree
Hide file tree
Showing 12 changed files with 2,565 additions and 1,378 deletions.
11 changes: 11 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]

[dev-packages]

[requires]
python_version = "3.10"
1,189 changes: 651 additions & 538 deletions backend/Pipfile.lock

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ pasted in this public README). If you are not in Penn Labs, see the "Loading Cou

NOTE: when using `pipenv`, environment variables are only refreshed when you exit your shell and rerun `pipenv shell` (this is a common source of confusing behavior, so it's good to know about).

## Linting

We use `black`, `flake8`, and 'isort' to lint our code. Once you are in the `backend` directory, you can run the following commands to lint:
1. `pipenv run black`
2. `pipenv run isort`
3. `pipenv run flake8`

## Loading Courses Data

### Via Database Dump (Penn Labs members)
Expand Down Expand Up @@ -193,10 +200,3 @@ If you don't want to use docker alone, you can also set up and run the dev envir
7. Running tests
- Run `python manage.py test` to run our test suite.
- To run a specific test, you can use the format `python manage.py test tests.review.test_api.OneReviewTestCase.test_course` (also note that in this example, you can use any prefix of that path to run a larger set of tests).

## Linting
```
pipenv run black -l100 .
pipenv run isort .
pipenv run flake8 .
```
4 changes: 2 additions & 2 deletions backend/review/import_utils/import_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,9 +270,9 @@ def stat(key, amt=1):
return stat


def import_summary_rows(summaries, show_progress_bar=True):
def import_summary_rows(summaries: iter, show_progress_bar=True):
"""
Imports summary rows given a summaries list.
Imports summary rows given a summaries iterable.
"""
stats = dict()
stat = gen_stat(stats)
Expand Down
11 changes: 10 additions & 1 deletion backend/review/import_utils/parse_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from lark import Lark, Transformer
from tqdm import tqdm

from courses.util import semester_suffix_map_inv, translate_semester_inv


"""
This file contains functionality for parsing the SQL dump files that we get from ISC.
Expand Down Expand Up @@ -72,7 +74,14 @@ def row(self, items):
to generate a row in a quasi-JSON format. Who needs MongoDB?
"""
_, col_names, values = items
return dict(zip(col_names, values))
row_dict = dict(zip(col_names, values))

# Convert new OpenData API semesters to internal semesters
if "TERM" in row_dict:
stripped_term = row_dict["TERM"].strip()
if stripped_term[-2:] in semester_suffix_map_inv:
row_dict["TERM"] = translate_semester_inv(stripped_term)
return row_dict

def TOKEN(self, items):
"""
Expand Down
141 changes: 74 additions & 67 deletions backend/review/management/commands/iscimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@
ISC_DESC_TABLE = "TEST_PCR_COURSE_DESC_V"


def assert_semesters_not_current(semesters):
current_semester = get_current_semester()
for semester in semesters:
if semester == current_semester:
raise ValueError(
f"You cannot import reviews for the current semester ({current_semester}). "
f"Did you forget to update the SEMESTER option in the Django admin console?"
)


class Command(BaseCommand):
help = """
Import course review data from the zip of mysqldump files that we get from ISC every semester.
Expand Down Expand Up @@ -150,7 +160,7 @@ def handle(self, *args, **kwargs):
import_all = kwargs["import_all"]
s3_bucket = kwargs["s3_bucket"]
is_zip_file = kwargs["zip"] or s3_bucket is not None
summary_file = kwargs["summary_file"]
summary_file = kwargs["summary_file"] # either summary table or summary hist table
import_details = kwargs["import_details"]
import_descriptions = kwargs["import_descriptions"]
show_progress_bar = kwargs["show_progress_bar"]
Expand All @@ -164,13 +174,7 @@ def handle(self, *args, **kwargs):
"Must define semester with (-s) or explicitly import all semesters with (-a)."
)
if semesters is not None:
current_semester = get_current_semester()
for semester in semesters:
if semester == current_semester:
raise ValueError(
f"You cannot import reviews for the current semester ({current_semester}). "
f"Did you forget to update the SEMESTER option in the Django admin console?"
)
assert_semesters_not_current(semesters)

if s3_bucket is not None:
fp = "/tmp/pcrdump.zip"
Expand All @@ -185,63 +189,66 @@ def handle(self, *args, **kwargs):
"modified if the whole script succeeds."
)

with transaction.atomic(): # Only commit changes if the whole script succeeds
# TODO: When we import details and crosslistings, get their data here too.
tables_to_get = [summary_file]
idx = 1
detail_idx = -1
if import_details:
tables_to_get.append(ISC_RATING_TABLE)
detail_idx = idx
idx += 1

description_idx = -1
if import_descriptions:
tables_to_get.append(ISC_DESC_TABLE)
description_idx = idx
idx += 1

files = self.get_files(src, is_zip_file, tables_to_get)

summary_fo = files[0]
print("Loading summary file...")
summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
tables_to_get = [summary_file]
idx = 1
detail_idx = -1
if import_details:
tables_to_get.append(ISC_RATING_TABLE)
detail_idx = idx
idx += 1

description_idx = -1
if import_descriptions:
tables_to_get.append(ISC_DESC_TABLE)
description_idx = idx
idx += 1

files = self.get_files(src, is_zip_file, tables_to_get)
summary_fo = files[0]

print("Loading summary file...")
summary_rows = load_sql_dump(summary_fo, progress=show_progress_bar, lazy=False)
gc.collect()
print("SQL parsed and loaded!")
if not import_all:
full_len = len(summary_rows)
summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
gc.collect()
print("SQL parsed and loaded!")

if not import_all:
full_len = len(summary_rows)
summary_rows = [r for r in summary_rows if r["TERM"] in semesters]
gc.collect()
filtered_len = len(summary_rows)
print(f"Filtered {full_len} rows down to {filtered_len} rows.")

semesters = sorted(list({r["TERM"] for r in summary_rows}))
gc.collect()
to_delete = Review.objects.filter(section__course__semester__in=semesters)
delete_count = to_delete.count()

if delete_count > 0:
if not force:
prompt = input(
f"This import will overwrite {delete_count} rows that have already been"
+ "imported. Continue? (y/N) "
filtered_len = len(summary_rows)
print(f"Filtered {full_len} rows down to {filtered_len} rows.")
semesters = sorted(list({r["TERM"] for r in summary_rows}))
gc.collect()

for semester in semesters:
print(f"Loading {semester}...")
with transaction.atomic(): # Commit changes if all imports for the semester succeed
to_delete = Review.objects.filter(section__course__semester=semester)
delete_count = to_delete.count()
if delete_count > 0:
if not force:
prompt = input(
f"This import will overwrite {delete_count} rows that have already been"
+ "imported. Continue? (y/N) "
)
if prompt.strip().upper() != "Y":
print("Aborting...")
return 0

print(
f"Deleting {delete_count} existing reviews for {semester} "
"from the database..."
)
if prompt.strip().upper() != "Y":
print("Aborting...")
return 0
to_delete.delete()

print(
f"Deleting {delete_count} existing reviews for semesters from the database..."
print(f"Importing reviews for semester {semester}")
stats = import_summary_rows(
(r for r in summary_rows if r["TERM"] == semester), show_progress_bar
)
to_delete.delete()

print(f"Importing reviews for semester(s) {', '.join(semesters)}")
stats = import_summary_rows(summary_rows, show_progress_bar)
print(stats)
print(stats)

gc.collect()
gc.collect()

with transaction.atomic():
if import_details:
print("Loading details file...")
stats = import_ratings_rows(
Expand All @@ -260,16 +267,16 @@ def handle(self, *args, **kwargs):
)
print(stats)

self.close_files(files)
# invalidate cached views
print("Invalidating cache...")
del_count = clear_cache()
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")
self.close_files(files)
# invalidate cached views
print("Invalidating cache...")
del_count = clear_cache()
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")

gc.collect()
gc.collect()

print("Recomputing Section.has_reviews...")
recompute_has_reviews()
print("Recomputing Section.has_reviews...")
recompute_has_reviews()

print("Done.")
return 0
9 changes: 9 additions & 0 deletions backend/tests/review/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ def test_parse_descriptions(self):
self.assertIsInstance(parse, dict)
self.assertDictEqual(expected, parse)

def test_semester_transformation(self):
query = """
INSERT into PCRDEV.TEST_PCR_COURSE_DESC_V
(TERM) Values ('202230');
"""
parse = parse_row(query)
expected = {"TERM": "2022C"}
self.assertDictEqual(parse, expected)


class ReviewImportTestCase(TestCase):
def setUp(self):
Expand Down
Loading

0 comments on commit 1d2cec7

Please sign in to comment.