diff --git a/cs-config/cs_config/functions.py b/cs-config/cs_config/functions.py index 9795762d..eec847f0 100644 --- a/cs-config/cs_config/functions.py +++ b/cs-config/cs_config/functions.py @@ -23,6 +23,9 @@ AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "") AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "") +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) CUR_DIR = os.path.dirname(os.path.realpath(__file__)) # Get Tax-Calculator default parameters @@ -172,7 +175,9 @@ def run_model(meta_param_dict, adjustment): meta_params = MetaParams() meta_params.adjust(meta_param_dict) if meta_params.data_source == "PUF": - data = retrieve_puf(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + data = retrieve_puf( + PUF_S3_FILE_LOCATION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY + ) # set name of cached baseline file in case use below cached_pickle = "TxFuncEst_baseline_PUF.pkl" else: diff --git a/cs-config/cs_config/helpers.py b/cs-config/cs_config/helpers.py index b9bf36fe..e6986c50 100644 --- a/cs-config/cs_config/helpers.py +++ b/cs-config/cs_config/helpers.py @@ -7,10 +7,23 @@ except ImportError: boto3 = None import gzip +import os import pandas as pd from taxcalc import Policy from collections import defaultdict +from pathlib import Path +import warnings +try: + from s3fs import S3FileSystem +except ImportError as ie: + S3FileSystem = None + +AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", None) +AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", None) +PUF_S3_FILE_LOCATION = os.environ.get( + "PUF_S3_LOCATION", "s3://ospc-data-files/puf.20210720.csv.gz" +) TC_LAST_YEAR = Policy.LAST_BUDGET_YEAR POLICY_SCHEMA = { @@ -73,20 +86,37 @@ } -def retrieve_puf(aws_access_key_id, aws_secret_access_key): +def retrieve_puf( + puf_s3_file_location=PUF_S3_FILE_LOCATION, + aws_access_key_id=AWS_ACCESS_KEY_ID, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, +): """ Function for retrieving the PUF from the OSPC S3 bucket """ - has_credentials = aws_access_key_id and aws_secret_access_key - if has_credentials and boto3 is not None: - client = boto3.client( - "s3", - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, + s3_reader_installed = S3FileSystem is not None + has_credentials = ( + aws_access_key_id is not None and aws_secret_access_key is not None + ) + if puf_s3_file_location and has_credentials and s3_reader_installed: + print("Reading puf from S3 bucket.", puf_s3_file_location) + fs = S3FileSystem( + key=AWS_ACCESS_KEY_ID, + secret=AWS_SECRET_ACCESS_KEY, ) - obj = client.get_object(Bucket="ospc-data-files", Key="puf.csv.gz") - gz = gzip.GzipFile(fileobj=obj["Body"]) - puf_df = pd.read_csv(gz) + with fs.open(PUF_S3_FILE_LOCATION) as f: + # Skips over header from top of file. + puf_df = pd.read_csv(f, compression="gzip") return puf_df + elif Path("puf.csv.gz").exists(): + print("Reading puf from puf.csv.gz.") + return pd.read_csv("puf.csv.gz", compression="gzip") + elif Path("puf.csv").exists(): + print("Reading puf from puf.csv.") + return pd.read_csv("puf.csv") else: + warnings.warn( + f"PUF file not available (has_credentials={has_credentials}, " + f"s3_reader_installed={s3_reader_installed})" + ) return None diff --git a/cs-config/install.sh b/cs-config/install.sh index 205fdcbc..86a77c80 100644 --- a/cs-config/install.sh +++ b/cs-config/install.sh @@ -1,12 +1,6 @@ # bash commands for installing your package -git clone -b master --depth 1 https://github.com/PSLmodels/OG-USA -cd OG-USA - -# Explicitly add channels for looking up dependencies outside of -# taxcalc and paramtools. If the channels are not specified like this, -# the tests fail due to not being able to converge on a solution. -conda config --add channels PSLmodels -conda config --add channels conda-forge -conda install scipy mkl dask matplotlib PSLmodels::taxcalc conda-forge::paramtools +# install packages needed by CS, but not in ogusa-dev env +pip install s3fs +# install OG-USA from source pip install -e . diff --git a/ogusa/calibrate.py b/ogusa/calibrate.py index 7a4ef0fe..8c42c5d5 100644 --- a/ogusa/calibrate.py +++ b/ogusa/calibrate.py @@ -17,6 +17,7 @@ def __init__( estimate_tax_functions=False, estimate_beta=False, estimate_chi_n=False, + estimate_pop=False, tax_func_path=None, iit_reform={}, guid="", @@ -27,6 +28,7 @@ def __init__( self.estimate_tax_functions = estimate_tax_functions self.estimate_beta = estimate_beta self.estimate_chi_n = estimate_chi_n + self.estimate_pop = estimate_pop if estimate_tax_functions: if tax_func_path is not None: run_micro = False @@ -42,7 +44,7 @@ def __init__( run_micro=run_micro, tax_func_path=tax_func_path, ) - if estimate_beta: + if self.estimate_beta: self.beta_j = estimate_beta_j.beta_estimate(self) # if estimate_chi_n: # chi_n = self.get_chi_n() @@ -57,35 +59,44 @@ def __init__( self.zeta = bequest_transmission.get_bequest_matrix(p.J, p.lambdas) # demographics - self.demographic_params = demographics.get_pop_objs( - p.E, - p.S, - p.T, - 0, - 99, - initial_data_year=p.start_year - 1, - final_data_year=p.start_year, - ) + if estimate_pop: + self.demographic_params = demographics.get_pop_objs( + p.E, + p.S, + p.T, + 0, + 99, + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, + ) - # demographics for 80 period lives (needed for getting e below) - demog80 = demographics.get_pop_objs( - 20, - 80, - p.T, - 0, - 99, - initial_data_year=p.start_year - 1, - final_data_year=p.start_year, - ) + # demographics for 80 period lives (needed for getting e below) + demog80 = demographics.get_pop_objs( + 20, + 80, + p.T, + 0, + 99, + initial_data_year=p.start_year - 1, + final_data_year=p.start_year, + ) - # earnings profiles - self.e = income.get_e_interp( - p.S, - self.demographic_params["omega_SS"], - demog80["omega_SS"], - p.lambdas, - plot=False, - ) + # earnings profiles + self.e = income.get_e_interp( + p.S, + self.demographic_params["omega_SS"], + demog80["omega_SS"], + p.lambdas, + plot=False, + ) + else: + self.e = income.get_e_interp( + p.S, + p.omega_SS, + p.omega_SS, + p.lambdas, + plot=False, + ) # Tax Functions def get_tax_function_parameters( @@ -334,6 +345,7 @@ def get_dict(self): dict["zeta"] = self.zeta dict.update(self.macro_params) dict["e"] = self.e - dict.update(self.demographic_params) + if self.estimate_pop: + dict.update(self.demographic_params) return dict