Skip to content

Commit

Permalink
fix: more assign script fixes, valid emails with DRF EmailField, noti…
Browse files Browse the repository at this point in the history
…fy_users=True
  • Loading branch information
iloveagent57 committed Mar 13, 2024
1 parent 3978df4 commit cad2486
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 36 deletions.
1 change: 1 addition & 0 deletions scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# boilerplate to treat as module
35 changes: 19 additions & 16 deletions scripts/assignment_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
"""
import csv
from collections import defaultdict, Counter
from email.utils import parseaddr

import click

from utils import is_valid_email

INPUT_FIELDNAMES = ['university_name', 'email']


Expand Down Expand Up @@ -59,26 +59,29 @@ def print_plan_counts(input_file):
print(plan, count)


def is_valid_email(email):
_, address = parseaddr(email)
if not address:
return False
return True


@click.command()
@click.option(
'--input-file',
help='Path of local file containing email addresses to assign.',
)
def validate_emails(input_file):
@click.option(
'--output-file',
help='Path of local file containing invalid email addresses store in a CSV.',
)
def validate_emails(input_file, output_file):
invalid_emails = Counter()
for row in _iterate_csv(input_file):
if not is_valid_email(row['email']):
invalid_emails[row['email']] += 1

print(f'There were {sum(invalid_emails.values())} invalid emails')
print(invalid_emails)
for index, row in enumerate(_iterate_csv(input_file)):
email = row['email']
uni_name = row['university_name']
if not is_valid_email(email):
invalid_emails[(uni_name, email)] += 1

print(f'There were a total of {sum(invalid_emails.values())} invalid emails for input of size {index + 1}')

with open(output_file, 'a+', encoding='latin-1') as f_out:
writer = csv.writer(f_out, delimiter=',')
for (uni_name, email), occurrences in invalid_emails.items():
writer.writerow([uni_name, email, occurrences])


@click.group()
Expand Down
46 changes: 26 additions & 20 deletions scripts/local_assignment_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@
import csv
import json
import os
import re
import time
from email.utils import parseaddr
from pprint import pprint

import click
import requests

from utils import is_valid_email


DEFAULT_CHUNK_SIZE = 100

Expand All @@ -66,7 +66,7 @@
}

OUTPUT_FIELDNAMES = ['chunk_id', 'subscription_plan_uuid', 'email', 'license_uuid']
INPUT_FIELDNAMES = ['email', 'university_name']
INPUT_FIELDNAMES = ['university_name', 'email']
PLANS_BY_NAME_FIELDNAMES = ['university_name', 'subscription_plan_uuid']


Expand All @@ -80,7 +80,6 @@ def _get_jwt(fetch_jwt=False, environment='local'):
'client_secret': client_secret,
'grant_type': 'client_credentials',
'token_type': 'jwt',
'scope': 'user_id email profile read write',
}
# we want to sent with a Content-Type of 'application/x-www-form-urlencoded'
# so send in the `data` param instead of `json`.
Expand All @@ -101,7 +100,7 @@ def get_already_processed_emails(results_file):
and returns a dictionary mapping already processed emails to their chunk_id.
"""
already_processed_emails = {}
with open(results_file, 'a+') as f_in:
with open(results_file, 'a+', encoding='latin-1') as f_in:
f_in.seek(0)
reader = csv.DictReader(f_in, fieldnames=OUTPUT_FIELDNAMES, delimiter=',')

Expand All @@ -123,7 +122,7 @@ def get_already_processed_emails(results_file):

def get_plan_uuids_by_name(plans_by_name_file):
plans_by_name = {}
with open(plans_by_name_file, 'a+') as f_in:
with open(plans_by_name_file, 'a+', encoding='latin-1') as f_in:
f_in.seek(0)
reader = csv.DictReader(f_in, fieldnames=PLANS_BY_NAME_FIELDNAMES, delimiter=',')

Expand All @@ -148,11 +147,6 @@ def get_plan_uuids_by_name(plans_by_name_file):
return plans_by_name


def is_valid_email(email):
_, address = parseaddr(email)
return bool(address)


def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SIZE):
"""
Yield chunks of (chunk_id, subscription_plan, email) from the given input file.
Expand Down Expand Up @@ -182,7 +176,9 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
continue

university_name = row['university_name']
subscription_plan_uuid = plans_by_name[university_name]
subscription_plan_uuid = plans_by_name.get(university_name)
if not subscription_plan_uuid:
print(f'No plan matches the given name: {university_name}')

# This should only happen on the very first row we process
if not current_subscription_plan_uuid:
Expand Down Expand Up @@ -210,7 +206,9 @@ def get_email_chunks(input_file_path, plans_by_name, chunk_size=DEFAULT_CHUNK_SI
yield chunk_id, current_subscription_plan_uuid, current_chunk


def _post_assignments(subscription_plan_uuid, emails_for_chunk, environment='local', fetch_jwt=False):
def _post_assignments(
subscription_plan_uuid, emails_for_chunk, environment='local', fetch_jwt=False, notify_users=True
):
"""
Make the POST request to assign licenses.
"""
Expand All @@ -219,7 +217,7 @@ def _post_assignments(subscription_plan_uuid, emails_for_chunk, environment='loc

payload = {
'user_emails': emails_for_chunk,
'notify_users': False,
'notify_users': notify_users,
}
headers = {
"Authorization": "JWT {}".format(_get_jwt(fetch_jwt, environment=environment)),
Expand Down Expand Up @@ -273,7 +271,7 @@ def request_assignments(subscription_plan_uuid, chunk_id, emails_for_chunk, envi

def do_assignment_for_chunk(
subscription_plan_uuid, chunk_id, email_chunk,
already_processed, results_file, environment='local', fetch_jwt=False, sleep_interval=DEFAULT_SLEEP_INTERVAL
already_processed, results_file, environment='local', fetch_jwt=False, sleep_interval=DEFAULT_SLEEP_INTERVAL,
):
"""
Given a "chunk" list emails for which assignments should be requested, checks if the given
Expand Down Expand Up @@ -341,8 +339,13 @@ def do_assignment_for_chunk(
help='Whether to fetch JWT based on stored client id and secret.',
is_flag=True,
)
@click.option(
'--dry-run',
help='Just prints what emails would be assigned to plan if true.',
is_flag=True,
)

def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sleep_interval, fetch_jwt):
def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sleep_interval, fetch_jwt, dry_run):
"""
Entry-point for this script.
"""
Expand All @@ -353,10 +356,13 @@ def run(input_file, plans_by_name_file, output_file, chunk_size, environment, sl
plan_uuids_by_name = get_plan_uuids_by_name(plans_by_name_file)

for chunk_id, subscription_plan_uuid, email_chunk in get_email_chunks(input_file, plan_uuids_by_name, chunk_size):
do_assignment_for_chunk(
subscription_plan_uuid, chunk_id, email_chunk,
already_processed, output_file, environment, fetch_jwt, sleep_interval,
)
if dry_run:
print(f'DRY RUN: chunk_id={chunk_id} would assign to plan {subscription_plan_uuid} emails: {email_chunk}')
else:
do_assignment_for_chunk(
subscription_plan_uuid, chunk_id, email_chunk,
already_processed, output_file, environment, fetch_jwt, sleep_interval,
)

if __name__ == '__main__':
run()
1 change: 1 addition & 0 deletions scripts/local_assignment_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
click
requests
django-rest-framework
13 changes: 13 additions & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# needed for email validation
import django
from rest_framework import serializers

EMAIL_FIELD = serializers.EmailField()


def is_valid_email(email):
try:
EMAIL_FIELD.run_validators(email)
return True
except Exception:
return False

0 comments on commit cad2486

Please sign in to comment.