Skip to content

Commit

Permalink
fix: Replace applymap with map for pd.DataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
esloch committed Dec 7, 2023
1 parent 43a7f55 commit 91ce992
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 6 deletions.
7 changes: 4 additions & 3 deletions pysus/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def dbf_to_parquet(dbf: str, _pbar=None) -> str:
_pbar.update(chunk_size)

chunk_df = pd.DataFrame(chunk)
table = pa.Table.from_pandas(chunk_df.applymap(decode_column))
#breakpoint()
table = pa.Table.from_pandas(chunk_df.map(decode_column))
pq.write_to_dataset(table, root_path=str(parquet))
except struct.error as err:
if _pbar:
Expand All @@ -135,7 +136,7 @@ def parse_dftypes(df: pd.DataFrame) -> pd.DataFrame:
def map_column_func(column_names: list[str], func):
# Maps a function to each value in each column
columns = [c for c in df.columns if c in column_names]
df[columns] = df[columns].applymap(func)
df[columns] = df[columns].map(func)

def str_to_int(string: str):
# If removing spaces, all characters are int,
Expand All @@ -157,7 +158,7 @@ def str_to_date(string: str):
map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date)
map_column_func(["CODMUNRES", "SEXO"], str_to_int)

df = df.applymap(
df = df.map(
lambda x: "" if str(x).isspace() else x
) # Remove all space values

Expand Down
6 changes: 6 additions & 0 deletions pysus/online_data/IBGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ def get_sidra_table(
query += f'/h/{headers}'

url = base_url + query

#data = requests.get(url)
#jsondata = data.json()
#df = pd.DataFrame.from_dict(jsondata)
#breakpoint()

print(f'Requesting data from {url}')
try:
with (get_legacy_session() as s, s.get(url) as response):
Expand Down
6 changes: 3 additions & 3 deletions pysus/online_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _parse_dftypes(df: pd.DataFrame) -> pd.DataFrame:
def map_column_func(column_names: list[str], func):
# Maps a function to each value in each column
columns = [c for c in df.columns if c in column_names]
df[columns] = df[columns].applymap(func)
df[columns] = df[columns].map(func)

def str_to_int(string: str) -> Union[int, float]:
# If removing spaces, all characters are int,
Expand All @@ -128,7 +128,7 @@ def str_to_date(string: str) -> datetime.date:
map_column_func(["DT_NOTIFIC", "DT_SIN_PRI"], str_to_date)
map_column_func(["CODMUNRES", "SEXO"], str_to_int)

df = df.applymap(
df = df.map(
lambda x: "" if str(x).isspace() else x
) # Remove all space values

Expand Down Expand Up @@ -631,7 +631,7 @@ def decode_column(value):
for d in self._stream_DBF(DBF(fpath, encoding="iso-8859-1", raw=True)):
try:
df = pd.DataFrame(d)
table = pa.Table.from_pandas(df.applymap(decode_column))
table = pa.Table.from_pandas(df.map(decode_column))
pq.write_to_dataset(table, root_path=parquet_dir)

except Exception as e:
Expand Down

0 comments on commit 91ce992

Please sign in to comment.