datacommonsorg · kurus21 · Nov 6, 2024 · Nov 6, 2024 · Nov 13, 2024 · Nov 13, 2024
diff --git a/scripts/us_census/pep/us_pep_sex/README.md b/scripts/us_census/pep/us_pep_sex/README.md
@@ -1,7 +1,7 @@
 # US Census PEP: Population Estimate By Sex
 
 ## About the Dataset
-This dataset has Population Estimates for the National, State and County geographic levels in United States from the year 1900 to 2021 on a yearly basis.        
+This dataset has Population Estimates for the National, State and County geographic levels in United States from the year 1900 to latest year on a yearly basis.        
 
 ### Download URL
 The data in txt/csv/xls formats are downloadable from within https://www2.census.gov/programs-surveys/popest/tables and https://www2.census.gov/programs-surveys/popest/datasets. The actual URLs are listed in download.py.
@@ -45,4 +45,17 @@ The below script will download the data.
 
 The below script will clean the data, Also generate final csv, mcf and tmcf files.
 
-`/bin/python3 scripts/us_census/pep/us_pep_sex/process.py`
+`/bin/python3 scripts/us_census/pep/us_pep_sex/process.py`
+
+###Automation Refresh
+The process.py has a parameter 'mode' with values 'download' and 'process'
+
+when the file 'process.py' is ran with the flag --mode=download, it will only download the files and put it in the input_files directory.
+i.e. python3 process.py mode=download
+
+when the file 'process.py' is ran with the flag --mode=process, it will process the downloaded files and put it in the output directory.
+i.e. python3 process.py mode=process
+
+when the file 'process .py' is ran without any flag, it will download and process the files and keep it in the respective directories as mentioned above.
+i.e. python3 process.py
+
diff --git a/scripts/us_census/pep/us_pep_sex/download.py b/scripts/us_census/pep/us_pep_sex/download.py
@@ -32,16 +32,20 @@ def download_files() -> None:
     Returns:
         None
     """
-    with open("scripts/us_census/pep/us_pep_sex/input_urls.txt",
-              "r") as url_files:
+    with open("input_urls.txt", "r") as url_files:
         input_urls = url_files.readlines()
     if not os.path.exists(_DOWNLOAD_PATH):
         os.mkdir(_DOWNLOAD_PATH)
     os.chdir(_DOWNLOAD_PATH)
 
     for file in input_urls:
-        file_name = file.split("/")[-1]
-        urllib.request.urlretrieve(file, file_name)
+        f = file.split("/")
+        file_name = f[-1].replace("\n", "")
+        print(file, file_name)
+        try:
+            urllib.request.urlretrieve(file, file_name)
+        except Exception as e:
+            print(e)
 
 
 if __name__ == '__main__':

diff --git a/scripts/us_census/pep/us_pep_sex/input_url.json b/scripts/us_census/pep/us_pep_sex/input_url.json
diff --git a/scripts/us_census/pep/us_pep_sex/input_urls.txt b/scripts/us_census/pep/us_pep_sex/input_urls.txt
diff --git a/scripts/us_census/pep/us_pep_sex/manifest.json b/scripts/us_census/pep/us_pep_sex/manifest.json
@@ -0,0 +1,19 @@
+{
+  "import_specifications": [
+    {
+      "import_name": "USCensusPEP_Sex",
+      "curator_emails": ["[email protected]"],
+      "provenance_url": "https://www2.census.gov/programs-surveys/popest/tables",
+      "provenance_description": "US Census Population Estimates Program (PEP).",
+      "scripts": ["process.py"],
+      "import_inputs": [
+        {
+          "template_mcf": "output/population_estimate_sex.tmcf",
+          "cleaned_csv": "output/population_estimate_sex.csv"
+        }
+      ],
+      "cron_schedule": "0 10 * * 1"
+    }
+  ]
+}
+