Merge pull request #1579 from cisagov/za/patch-agency-info

Ticket #1513: Fix blank values for federal_agency information
cisagov · Jan 11, 2024 · c362b1a · c362b1a
2 parents 3175ffe + e595349
commit c362b1a
Show file tree

Hide file tree

Showing 6 changed files with 519 additions and 2 deletions.
diff --git a/docs/operations/data_migration.md b/docs/operations/data_migration.md
@@ -524,3 +524,37 @@ Example: `cf ssh getgov-za`
 | 2 | **debug**                  | Increases logging detail. Defaults to False.                                |
 | 3 | **limitParse**             | Determines how many domains to parse. Defaults to all.                      |
 | 4 | **disableIdempotentCheck** | Boolean that determines if we should check for idempotence or not. Compares the proposed extension date to the value in TransitionDomains. Defaults to False. |
+
+
+## Patch Federal Agency Info
+This section outlines how to use `patch_federal_agency_info.py`
+
+### Running on sandboxes
+
+#### Step 1: Grab the latest `current-full.csv` file from the dotgov-data repo
+Download the csv from [here](https://github.com/cisagov/dotgov-data/blob/main/current-full.csv) and place this file under the `src/migrationdata/` directory.
+
+#### Step 2: Transfer the `current-full.csv` file to your sandbox
+[Click here to go to the section about transferring data to sandboxes](#step-1-transfer-data-to-sandboxes)
+
+#### Step 3: Login to CloudFoundry
+```cf login -a api.fr.cloud.gov --sso```
+
+#### Step 4: SSH into your environment
+```cf ssh getgov-{space}```
+
+Example: `cf ssh getgov-za`
+
+#### Step 5: Create a shell instance
+```/tmp/lifecycle/shell```
+
+#### Step 6: Patch agency info
+```./manage.py patch_federal_agency_info migrationdata/current-full.csv --debug```
+
+### Running locally
+```docker-compose exec app ./manage.py patch_federal_agency_info migrationdata/current-full.csv --debug```
+
+##### Optional parameters
+|   | Parameter                  | Description                                                                 |
+|:-:|:-------------------------- |:----------------------------------------------------------------------------|
+| 1 | **debug**                  | Increases logging detail. Defaults to False.                                |
diff --git a/src/registrar/management/commands/patch_federal_agency_info.py b/src/registrar/management/commands/patch_federal_agency_info.py
@@ -0,0 +1,262 @@
+"""Loops through each valid DomainInformation object and updates its agency value"""
+import argparse
+import csv
+import logging
+import os
+from typing import List
+
+from django.core.management import BaseCommand
+from registrar.management.commands.utility.terminal_helper import TerminalColors, TerminalHelper
+from registrar.models.domain_information import DomainInformation
+from django.db.models import Q
+
+from registrar.models.transition_domain import TransitionDomain
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = "Loops through each valid DomainInformation object and updates its agency value"
+
+    def __init__(self):
+        super().__init__()
+        self.di_to_update: List[DomainInformation] = []
+        self.di_failed_to_update: List[DomainInformation] = []
+        self.di_skipped: List[DomainInformation] = []
+
+    def add_arguments(self, parser):
+        """Adds command line arguments"""
+        parser.add_argument(
+            "current_full_filepath",
+            help="TBD",
+        )
+        parser.add_argument("--debug", action=argparse.BooleanOptionalAction)
+        parser.add_argument("--sep", default=",", help="Delimiter character")
+
+    def handle(self, current_full_filepath, **kwargs):
+        """Loops through each valid DomainInformation object and updates its agency value"""
+        debug = kwargs.get("debug")
+        separator = kwargs.get("sep")
+
+        # Check if the provided file path is valid
+        if not os.path.isfile(current_full_filepath):
+            raise argparse.ArgumentTypeError(f"Invalid file path '{current_full_filepath}'")
+
+        # === Update the "federal_agency" field === #
+        was_success = self.patch_agency_info(debug)
+
+        # === Try to process anything that was skipped === #
+        # We should only correct skipped records if the previous step was successful.
+        # If something goes wrong, then we risk corrupting data, so skip this step.
+        if len(self.di_skipped) > 0 and was_success:
+            # Flush out the list of DomainInformations to update
+            self.di_to_update.clear()
+            self.process_skipped_records(current_full_filepath, separator, debug)
+
+            # Clear the old skipped list, and log the run summary
+            self.di_skipped.clear()
+            self.log_script_run_summary(debug)
+        elif not was_success:
+            # This code should never execute. This can only occur if bulk_update somehow fails,
+            # which may indicate some sort of data corruption.
+            logger.error(
+                f"{TerminalColors.FAIL}"
+                "Could not automatically patch skipped records. The initial update failed."
+                "An error was encountered when running this script, please inspect the following "
+                f"records for accuracy and completeness: {self.di_failed_to_update}"
+                f"{TerminalColors.ENDC}"
+            )
+
+    def patch_agency_info(self, debug):
+        """
+        Updates the federal_agency field of each valid DomainInformation object based on the corresponding
+        TransitionDomain object. Skips the update if the TransitionDomain object does not exist or its
+        federal_agency field is None. Logs the update, skip, and failure actions if debug mode is on.
+        After all updates, logs a summary of the results.
+        """
+
+        # Grab all DomainInformation objects (and their associated TransitionDomains)
+        # that need to be updated
+        empty_agency_query = Q(federal_agency=None) | Q(federal_agency="")
+        domain_info_to_fix = DomainInformation.objects.filter(empty_agency_query)
+
+        domain_names = domain_info_to_fix.values_list("domain__name", flat=True)
+        transition_domains = TransitionDomain.objects.filter(domain_name__in=domain_names).exclude(empty_agency_query)
+
+        # Get the domain names from TransitionDomain
+        td_agencies = transition_domains.values_list("domain_name", "federal_agency").distinct()
+
+        human_readable_domain_names = list(domain_names)
+        # Code execution will stop here if the user prompts "N"
+        TerminalHelper.prompt_for_execution(
+            system_exit_on_terminate=True,
+            info_to_inspect=f"""
+            ==Proposed Changes==
+            Number of DomainInformation objects to change: {len(human_readable_domain_names)}
+            The following DomainInformation objects will be modified: {human_readable_domain_names}
+            """,
+            prompt_title="Do you wish to patch federal_agency data?",
+        )
+        logger.info("Updating...")
+
+        # Create a dictionary mapping of domain_name to federal_agency
+        td_dict = dict(td_agencies)
+
+        for di in domain_info_to_fix:
+            domain_name = di.domain.name
+            federal_agency = td_dict.get(domain_name)
+            log_message = None
+
+            # If agency exists on a TransitionDomain, update the related DomainInformation object
+            if domain_name in td_dict:
+                di.federal_agency = federal_agency
+                self.di_to_update.append(di)
+                log_message = f"{TerminalColors.OKCYAN}Updated {di}{TerminalColors.ENDC}"
+            else:
+                self.di_skipped.append(di)
+                log_message = f"{TerminalColors.YELLOW}Skipping update for {di}{TerminalColors.ENDC}"
+
+            # Log the action if debug mode is on
+            if debug and log_message is not None:
+                logger.info(log_message)
+
+        # Bulk update the federal agency field in DomainInformation objects
+        DomainInformation.objects.bulk_update(self.di_to_update, ["federal_agency"])
+
+        # Get a list of each domain we changed
+        corrected_domains = DomainInformation.objects.filter(domain__name__in=domain_names)
+
+        # After the update has happened, do a sweep of what we get back.
+        # If the fields we expect to update are still None, then something is wrong.
+        for di in corrected_domains:
+            if di not in self.di_skipped and di.federal_agency is None:
+                logger.info(f"{TerminalColors.FAIL}Failed to update {di}{TerminalColors.ENDC}")
+                self.di_failed_to_update.append(di)
+
+        # === Log results and return data === #
+        self.log_script_run_summary(debug)
+        # Tracks if this script was successful. If any errors are found, something went very wrong.
+        was_success = len(self.di_failed_to_update) == 0
+        return was_success
+
+    def process_skipped_records(self, file_path, separator, debug):
+        """If we encounter any DomainInformation records that do not have data in the associated
+        TransitionDomain record, then check the associated current-full.csv file for this
+        information."""
+
+        # Code execution will stop here if the user prompts "N"
+        TerminalHelper.prompt_for_execution(
+            system_exit_on_terminate=True,
+            info_to_inspect=f"""
+            ==File location==
+            current-full.csv filepath: {file_path}
+
+            ==Proposed Changes==
+            Number of DomainInformation objects to change: {len(self.di_skipped)}
+            The following DomainInformation objects will be modified if agency data exists in file: {self.di_skipped}
+            """,
+            prompt_title="Do you wish to patch skipped records?",
+        )
+        logger.info("Updating...")
+
+        file_data = self.read_current_full(file_path, separator)
+        for di in self.di_skipped:
+            domain_name = di.domain.name
+            row = file_data.get(domain_name)
+            fed_agency = None
+            if row is not None and "agency" in row:
+                fed_agency = row.get("agency")
+
+            # Determine if we should update this record or not.
+            # If we don't get any data back, something went wrong.
+            if fed_agency is not None:
+                di.federal_agency = fed_agency
+                self.di_to_update.append(di)
+                if debug:
+                    logger.info(f"{TerminalColors.OKCYAN}" f"Updating {di}" f"{TerminalColors.ENDC}")
+            else:
+                self.di_failed_to_update.append(di)
+                logger.error(
+                    f"{TerminalColors.FAIL}" f"Could not update {di}. No information found." f"{TerminalColors.ENDC}"
+                )
+
+        # Bulk update the federal agency field in DomainInformation objects
+        DomainInformation.objects.bulk_update(self.di_to_update, ["federal_agency"])
+
+    def read_current_full(self, file_path, separator):
+        """Reads the current-full.csv file and stores it in a dictionary"""
+        with open(file_path, "r") as requested_file:
+            old_reader = csv.DictReader(requested_file, delimiter=separator)
+            # Some variants of current-full.csv have key casing differences for fields
+            # such as "Domain name" or "Domain Name". This corrects that.
+            reader = self.lowercase_fieldnames(old_reader)
+            # Return a dictionary with the domain name as the key,
+            # and the row information as the value
+            dict_data = {}
+            for row in reader:
+                domain_name = row.get("domain name")
+                if domain_name is not None:
+                    domain_name = domain_name.lower()
+                    dict_data[domain_name] = row
+
+            return dict_data
+
+    def lowercase_fieldnames(self, reader):
+        """Lowercases all field keys in a dictreader to account for potential casing differences"""
+        for row in reader:
+            yield {k.lower(): v for k, v in row.items()}
+
+    def log_script_run_summary(self, debug):
+        """Prints success, failed, and skipped counts, as well as
+        all affected objects."""
+        update_success_count = len(self.di_to_update)
+        update_failed_count = len(self.di_failed_to_update)
+        update_skipped_count = len(self.di_skipped)
+
+        # Prepare debug messages
+        debug_messages = {
+            "success": (f"{TerminalColors.OKCYAN}Updated: {self.di_to_update}{TerminalColors.ENDC}\n"),
+            "skipped": (f"{TerminalColors.YELLOW}Skipped: {self.di_skipped}{TerminalColors.ENDC}\n"),
+            "failed": (f"{TerminalColors.FAIL}Failed: {self.di_failed_to_update}{TerminalColors.ENDC}\n"),
+        }
+
+        # Print out a list of everything that was changed, if we have any changes to log.
+        # Otherwise, don't print anything.
+        TerminalHelper.print_conditional(
+            debug,
+            f"{debug_messages.get('success') if update_success_count > 0 else ''}"
+            f"{debug_messages.get('skipped') if update_skipped_count > 0 else ''}"
+            f"{debug_messages.get('failed') if update_failed_count > 0 else ''}",
+        )
+
+        if update_failed_count == 0 and update_skipped_count == 0:
+            logger.info(
+                f"""{TerminalColors.OKGREEN}
+                ============= FINISHED ===============
+                Updated {update_success_count} DomainInformation entries
+                {TerminalColors.ENDC}
+                """
+            )
+        elif update_failed_count == 0:
+            logger.warning(
+                f"""{TerminalColors.YELLOW}
+                ============= FINISHED ===============
+                Updated {update_success_count} DomainInformation entries
+
+                ----- SOME AGENCY DATA WAS NONE (WILL BE PATCHED AUTOMATICALLY) -----
+                Skipped updating {update_skipped_count} DomainInformation entries
+                {TerminalColors.ENDC}
+                """
+            )
+        else:
+            logger.error(
+                f"""{TerminalColors.FAIL}
+                ============= FINISHED ===============
+                Updated {update_success_count} DomainInformation entries
+
+                ----- UPDATE FAILED -----
+                Failed to update {update_failed_count} DomainInformation entries,
+                Skipped updating {update_skipped_count} DomainInformation entries
+                {TerminalColors.ENDC}
+                """
+            )
diff --git a/src/registrar/tests/common.py b/src/registrar/tests/common.py
@@ -743,6 +743,25 @@ def dummyInfoContactResultData(
         ],
     )
 
+    mockVerisignDataInfoContact = mockDataInfoDomain.dummyInfoContactResultData(
+        "defaultVeri", "[email protected]", datetime.datetime(2023, 5, 25, 19, 45, 35), "lastPw"
+    )
+    InfoDomainWithVerisignSecurityContact = fakedEppObject(
+        "fakepw",
+        cr_date=datetime.datetime(2023, 5, 25, 19, 45, 35),
+        contacts=[
+            common.DomainContact(
+                contact="defaultVeri",
+                type=PublicContact.ContactTypeChoices.SECURITY,
+            )
+        ],
+        hosts=["fake.host.com"],
+        statuses=[
+            common.Status(state="serverTransferProhibited", description="", lang="en"),
+            common.Status(state="inactive", description="", lang="en"),
+        ],
+    )
+
     InfoDomainWithDefaultTechnicalContact = fakedEppObject(
         "fakepw",
         cr_date=datetime.datetime(2023, 5, 25, 19, 45, 35),
@@ -1058,6 +1077,7 @@ def mockInfoDomainCommands(self, _request, cleaned):
             "freeman.gov": (self.InfoDomainWithContacts, None),
             "threenameserversDomain.gov": (self.infoDomainThreeHosts, None),
             "defaultsecurity.gov": (self.InfoDomainWithDefaultSecurityContact, None),
+            "adomain2.gov": (self.InfoDomainWithVerisignSecurityContact, None),
             "defaulttechnical.gov": (self.InfoDomainWithDefaultTechnicalContact, None),
             "justnameserver.com": (self.justNameserver, None),
         }
@@ -1087,6 +1107,8 @@ def mockInfoContactCommands(self, _request, cleaned):
                 mocked_result = self.mockDefaultSecurityContact
             case "defaultTech":
                 mocked_result = self.mockDefaultTechnicalContact
+            case "defaultVeri":
+                mocked_result = self.mockVerisignDataInfoContact
             case _:
                 # Default contact return
                 mocked_result = self.mockDataInfoContact