Skip to content

Commit

Permalink
Create function and sub functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ldavies99 committed Oct 31, 2024
1 parent ba419c9 commit d42bef9
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 18 deletions.
18 changes: 0 additions & 18 deletions mbs_results/csw_to_spp_converter.py

This file was deleted.

118 changes: 118 additions & 0 deletions mbs_results/utilities/csw_to_spp_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import fnmatch
from os import listdir
from os.path import isfile, join
import pandas as pd

from utils import convert_column_to_datetime

def get_patern_df(
filepath: str,
pattern: str
) -> pd.DataFrame:
"""Loads as pd dataframe all csv files with pattern.
Parameters
----------
filepath : str
Filepath to folder containg desired files.
pattern : str
Regex pattern to filter files in the folder based on name.
Returns
-------
pd.DataFrame
Dataframe containg data from all selected files.
"""

filenames = [
filename for filename in listdir(filepath) if isfile(join(filepath, filename))
]
filenames = fnmatch.filter(filenames, pattern)
df_list = [pd.read_csv(filepath + "/" + filename) for filename in filenames]
df = pd.concat(df_list, ignore_index=True)

return df

def get_qv_and_cp_data(
cp_path: str,
qv_path: str,
) -> pd.DataFrame:
"""Reads and joins qv and cp data.
Parameters
----------
cp_path : str
Filepath to folder containing cp data.
qv_path : str
Filepath to folder containing qv data.
Returns
-------
pd.DataFrame
Dataframe containing combined qv and cp data.
"""

qv_df = get_patern_df(qv_path,"qv*.csv")
cp_df = get_patern_df(cp_path,"cp*.csv")

qv_and_cp = pd.merge(qv_df,cp_df,how = "left",on = ["period","reference"])

return qv_and_cp

def csw_to_spp(
cp_path: str,
qv_path: str,
output_path: str,
column_map: dict,
period: str,
period_range: int
) -> None:
"""Combines cp and qv files, filters and renames columns based on a mapping, and
then saves the output as a json file.
Parameters
----------
cp_path : str
Filepath to folder containing cp data.
qv_path : str
Filepath to folder containing qv data.
output_path : str
Filepath to save json file.
column_map : dict
Dictionary containing desired columns from qv and cp data as keys and their
desired names as values.
period : str
Date to filter output on (YYYY-MM-DD).
period_range : str
Number of months from the period and previous to include in the output.
"""
qv_and_cp = get_qv_and_cp_data(cp_path,qv_path)

qv_and_cp["period"] = convert_column_to_datetime(qv_and_cp["period"])

period = pd.Timestamp(period)

qv_and_cp = qv_and_cp[(qv_and_cp['period'] > period - pd.DateOffset(months=period_range)) & (qv_and_cp['period'] <= period)]

qv_and_cp["period"] = qv_and_cp["period"].dt.strftime('%Y%m')

qv_and_cp = qv_and_cp[column_map.keys()].rename(columns=column_map)

qv_and_cp.to_json(f"{output_path}_{period.strftime('%Y%m')}_{period_range}.json")

col_mapping = {
"reference": "reference",
"period": "period",
"error_mkr": "status",
"question_no": "questioncode",
"returned_value": "response",
"adjusted_value": "adjustedresponse",
}

filepath = "C:/Users/daviel9/Office for National Statistics/Legacy Uplift - MBS/MBS_Anonymised-Adjusted_Responses-Disclosive_Contributor_List_Applied-20240813T1530Z"

csw_to_spp(filepath, filepath, "D:/test", col_mapping, "2023-03-01", 3)

df = pd.read_json("D:/test_202303_3.json")
print(df.head())
print(df.tail())

0 comments on commit d42bef9

Please sign in to comment.