openedx · iloveagent57 · Mar 11, 2024 · Mar 22, 2024
diff --git a/scripts/__init__.py b/scripts/__init__.py
@@ -0,0 +1 @@
+# boilerplate to treat as module
diff --git a/scripts/assignment_validation.py b/scripts/assignment_validation.py
@@ -14,10 +14,10 @@
 """
 import csv
 from collections import defaultdict, Counter
-from email.utils import parseaddr
-
 import click
 
+from utils import is_valid_email
+
 INPUT_FIELDNAMES = ['university_name', 'email']
 
 
@@ -59,26 +59,29 @@ def print_plan_counts(input_file):
         print(plan, count)
 
 
-def is_valid_email(email):
-    _, address = parseaddr(email)
-    if not address:
-        return False
-    return True
-
-
 @click.command()
 @click.option(
     '--input-file',
     help='Path of local file containing email addresses to assign.',
 )
-def validate_emails(input_file):
+@click.option(
+    '--output-file',
+    help='Path of local file containing invalid email addresses store in a CSV.',
+)
+def validate_emails(input_file, output_file):
     invalid_emails = Counter()
-    for row in _iterate_csv(input_file):
-        if not is_valid_email(row['email']):
-            invalid_emails[row['email']] += 1
-
-    print(f'There were {sum(invalid_emails.values())} invalid emails')
-    print(invalid_emails)
+    for index, row in enumerate(_iterate_csv(input_file)):
+        email = row['email']
+        uni_name = row['university_name']
+        if not is_valid_email(email):
+            invalid_emails[(uni_name, email)] += 1
+
+    print(f'There were a total of {sum(invalid_emails.values())} invalid emails for input of size {index + 1}')
+
+    with open(output_file, 'a+', encoding='latin-1') as f_out:
+        writer = csv.writer(f_out, delimiter=',')
+        for (uni_name, email), occurrences in invalid_emails.items():
+            writer.writerow([uni_name, email, occurrences])
 
 
 @click.group()

diff --git a/scripts/local_assignment_multi.py b/scripts/local_assignment_multi.py
@@ -40,14 +40,14 @@
 import csv
 import json
 import os
-import re
 import time
-from email.utils import parseaddr
 from pprint import pprint
 
 import click
 import requests
 
+from utils import is_valid_email
+
 
 DEFAULT_CHUNK_SIZE = 100
 
@@ -66,7 +66,7 @@
 }
 
 OUTPUT_FIELDNAMES = ['chunk_id', 'subscription_plan_uuid', 'email', 'license_uuid']
-INPUT_FIELDNAMES = ['email', 'university_name']
+INPUT_FIELDNAMES = ['university_name', 'email']
 PLANS_BY_NAME_FIELDNAMES = ['university_name', 'subscription_plan_uuid']
 
 
@@ -80,7 +80,6 @@ def _get_jwt(fetch_jwt=False, environment='local'):
             'client_secret': client_secret,
             'grant_type': 'client_credentials',
             'token_type': 'jwt',
-            'scope': 'user_id email profile read write',
         }
         # we want to sent with a Content-Type of 'application/x-www-form-urlencoded'
         # so send in the `data` param instead of `json`.
@@ -101,7 +100,7 @@ def get_already_processed_emails(results_file):
     and returns a dictionary mapping already processed emails to their chunk_id.
     """
     already_processed_emails = {}
-    with open(results_file, 'a+') as f_in:
+    with open(results_file, 'a+', encoding='latin-1') as f_in:
         f_in.seek(0)
         reader = csv.DictReader(f_in, fieldnames=OUTPUT_FIELDNAMES, delimiter=',')
 
@@ -123,7 +122,7 @@ def get_already_processed_emails(results_file):
 
 def get_plan_uuids_by_name(plans_by_name_file):
     plans_by_name = {}
-    with open(plans_by_name_file, 'a+') as f_in:
+    with open(plans_by_name_file, 'a+', encoding='latin-1') as f_in:
         f_in.seek(0)
         reader = csv.DictReader(f_in, fieldnames=PLANS_BY_NAME_FIELDNAMES, delimiter=',')
 
@@ -148,11 +147,6 @@ def get_plan_uuids_by_name(plans_by_name_file):
     return plans_by_name
 
 
-def is_valid_email(email):
-    _, address = parseaddr(email)
-    return bool(address)
-
-
 def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SIZE):
     """
     Yield chunks of (chunk_id, subscription_plan, email) from the given input file.  
@@ -182,7 +176,9 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
                 continue
 
             university_name = row['university_name']
-            subscription_plan_uuid = plans_by_name[university_name]
+            subscription_plan_uuid = plans_by_name.get(university_name)
+            if not subscription_plan_uuid:
+                print(f'No plan matches the given name: {university_name}')
 
             # This should only happen on the very first row we process
             if not current_subscription_plan_uuid:
@@ -210,7 +206,9 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
         yield chunk_id, current_subscription_plan_uuid, current_chunk
 
 
-def _post_assignments(subscription_plan_uuid, emails_for_chunk, environment='local', fetch_jwt=False):
+def _post_assignments(
+    subscription_plan_uuid, emails_for_chunk, environment='local', fetch_jwt=False, notify_users=False,
+):
     """
     Make the POST request to assign licenses.
     """
@@ -219,7 +217,7 @@ def _post_assignments(subscription_plan_uuid, emails_for_chunk, environment='loc
 
     payload = {
         'user_emails': emails_for_chunk,
-        'notify_users': False,
+        'notify_users': notify_users,
     }
     headers = {
         "Authorization": "JWT {}".format(_get_jwt(fetch_jwt, environment=environment)),
@@ -245,12 +243,13 @@ def request_assignments(subscription_plan_uuid, chunk_id, emails_for_chunk, envi
         response.raise_for_status()
     except requests.exceptions.HTTPError:
         # if it's a 401, try refetching the JWT and re-try the request
+        print(response.content)
         if response.status_code == 401:
             print('EXPIRED JWT, REFETCHING...')
             response = _post_assignments(subscription_plan_uuid, emails_for_chunk, environment, fetch_jwt)
             response.raise_for_status()
         else:
-            raise
+            print('Continuing past this exception.')
 
     response_data = response.json()
 
@@ -273,7 +272,7 @@ def request_assignments(subscription_plan_uuid, chunk_id, emails_for_chunk, envi
 
 def do_assignment_for_chunk(
     subscription_plan_uuid, chunk_id, email_chunk,
-    already_processed, results_file, environment='local', fetch_jwt=False, sleep_interval=DEFAULT_SLEEP_INTERVAL
+    already_processed, results_file, environment='local', fetch_jwt=False, sleep_interval=DEFAULT_SLEEP_INTERVAL,
 ):
     """
     Given a "chunk" list emails for which assignments should be requested, checks if the given
@@ -290,9 +289,14 @@ def do_assignment_for_chunk(
 
     results_for_chunk = []
     if payload_for_chunk:
-        results_for_chunk = request_assignments(
-            subscription_plan_uuid, chunk_id, payload_for_chunk, environment, fetch_jwt,
-        )
+        try:
+            results_for_chunk = request_assignments(
+                subscription_plan_uuid, chunk_id, payload_for_chunk, environment, fetch_jwt,
+            )
+        except Exception as exc:
+            print(exc)
+            print('continuing on...')
+            return
         with open(results_file, 'a+') as f_out:
             writer = csv.writer(f_out, delimiter=',')
             writer.writerows(results_for_chunk)
@@ -341,8 +345,13 @@ def do_assignment_for_chunk(
     help='Whether to fetch JWT based on stored client id and secret.',
     is_flag=True,
 )
+@click.option(
+    '--dry-run',
+    help='Just prints what emails would be assigned to plan if true.',
+    is_flag=True,
+)
 
-def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sleep_interval, fetch_jwt):
+def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sleep_interval, fetch_jwt, dry_run):
     """
     Entry-point for this script.
     """
@@ -353,10 +362,13 @@ def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sl
     plan_uuids_by_name = get_plan_uuids_by_name(plans_by_name_file)
 
     for chunk_id, subscription_plan_uuid, email_chunk in get_email_chunks(input_file, plan_uuids_by_name, chunk_size):
-        do_assignment_for_chunk(
-            subscription_plan_uuid, chunk_id, email_chunk,
-            already_processed, output_file, environment, fetch_jwt, sleep_interval,
-        )
+        if dry_run:
+            print(f'DRY RUN: chunk_id={chunk_id} would assign to plan {subscription_plan_uuid} emails: {email_chunk}')
+        else:
+            do_assignment_for_chunk(
+                subscription_plan_uuid, chunk_id, email_chunk,
+                already_processed, output_file, environment, fetch_jwt, sleep_interval,
+            )
 
 if __name__ == '__main__':
     run()
diff --git a/scripts/local_assignment_requirements.txt b/scripts/local_assignment_requirements.txt
@@ -1,2 +1,3 @@
 click
 requests
+django-rest-framework
diff --git a/scripts/utils.py b/scripts/utils.py
@@ -0,0 +1,13 @@
+# needed for email validation
+import django
+from rest_framework import serializers
+
+EMAIL_FIELD = serializers.EmailField()
+
+
+def is_valid_email(email):
+    try:
+        EMAIL_FIELD.run_validators(email)
+        return True
+    except Exception:
+        return False