From 7b160b18b4d470d2b6962281ec1cc3fd5d7405d0 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Wed, 21 Aug 2024 19:12:25 +0000 Subject: [PATCH] typing fixes --- src/data_common/dataset/resource_management.py | 10 +++++----- src/data_common/dataset/table_management.py | 2 +- src/data_common/pandas/df_extensions/common.py | 2 +- src/data_common/pandas/df_extensions/la.py | 2 +- src/data_common/pandas/df_extensions/space.py | 9 ++++----- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/data_common/dataset/resource_management.py b/src/data_common/dataset/resource_management.py index b534059..ae5c458 100644 --- a/src/data_common/dataset/resource_management.py +++ b/src/data_common/dataset/resource_management.py @@ -1042,9 +1042,9 @@ def build_coversheet( desc = self.get_datapackage() settings = get_settings() - bold = writer.book.add_format({"bold": True}) + bold = writer.book.add_format({"bold": True}) # type: ignore - ws = writer.book.add_worksheet("package_description") + ws = writer.book.add_worksheet("package_description") # type: ignore ws.set_column(2, 2, 40) ws.set_column(3, 3, 30) ws.write(2, 2, "Dataset", bold) @@ -1200,7 +1200,7 @@ def build_excel(self, is_geodata: bool = False): writer = pd.ExcelWriter(excel_path) writer = self.build_coversheet(writer, allowed_sheets=allowed_resource_slugs) - text_wrap = writer.book.add_format({"text_wrap": True}) + text_wrap = writer.book.add_format({"text_wrap": True}) # type: ignore for sheet_name, df in sheets.items(): short_sheet_name = sheet_name[-31:] # only allow 31 characters @@ -1210,7 +1210,7 @@ def build_excel(self, is_geodata: bool = False): df.to_excel(writer, sheet_name=short_sheet_name, index=False) for column in df: - column_length = max(df[column].astype(str).map(len).max(), len(column)) + column_length = max(df[column].astype(str).map(len).max(), len(column)) # type: ignore column_length += 4 col_idx = df.columns.get_loc(column) @@ -1223,7 +1223,7 @@ def build_excel(self, is_geodata: bool = False): col_idx, col_idx, 50, text_wrap ) - writer.save() + writer.save() # type: ignore def build_sqlite(self, is_geodata: bool = False): """ diff --git a/src/data_common/dataset/table_management.py b/src/data_common/dataset/table_management.py index 657d27d..05595b6 100644 --- a/src/data_common/dataset/table_management.py +++ b/src/data_common/dataset/table_management.py @@ -111,7 +111,7 @@ def get_table_schema( Produce a table data schema for the dataframe https://specs.frictionlessdata.io/table-schema/ """ - data: SchemaValidator = build_table_schema(df, index=False, version=False) + data: SchemaValidator = build_table_schema(df, index=False, version=False) # type: ignore data["fields"] = [ cls.enhance_field(df, field, descriptions, enums) for field in data["fields"] diff --git a/src/data_common/pandas/df_extensions/common.py b/src/data_common/pandas/df_extensions/common.py index d4fa08a..ba36948 100644 --- a/src/data_common/pandas/df_extensions/common.py +++ b/src/data_common/pandas/df_extensions/common.py @@ -12,7 +12,7 @@ def __init__(self, pandas_obj: pd.DataFrame): self._obj = pandas_obj def update_from_map(self, map: dict) -> pd.Series: - return self._obj.apply(lambda x: map.get(x, x)) + return self._obj.apply(lambda x: map.get(x, x)) # type:ignore @pd_api.extensions.register_dataframe_accessor("common") diff --git a/src/data_common/pandas/df_extensions/la.py b/src/data_common/pandas/df_extensions/la.py index 7f76973..44ef469 100644 --- a/src/data_common/pandas/df_extensions/la.py +++ b/src/data_common/pandas/df_extensions/la.py @@ -180,7 +180,7 @@ def get_council_info( adf = get_la_df(include_historical=include_historical, as_of_date=as_of_date) if items: adf = adf[["local-authority-code"] + items] - return df.merge(adf, how=merge_type) + return df.merge(adf, how=merge_type) # type: ignore def just_lower_tier(self) -> pd.DataFrame: """ diff --git a/src/data_common/pandas/df_extensions/space.py b/src/data_common/pandas/df_extensions/space.py index 750a9da..1143cf2 100644 --- a/src/data_common/pandas/df_extensions/space.py +++ b/src/data_common/pandas/df_extensions/space.py @@ -14,7 +14,6 @@ from IPython.display import display from ipywidgets import interactive from matplotlib.colors import Colormap -from numpy.typing import ArrayLike from scipy.spatial.distance import pdist, squareform from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score @@ -198,7 +197,7 @@ def map_from_anchor(self, anchor: pd.DataFrame | Path) -> dict[int, int]: # create mapping of the new (and possibly random numbers) to the ones we've previously assigned labels. - return mapping.to_dict() + return mapping.to_dict() # type: ignore def get_label_options(self) -> list: return [self.get_label_name(x) for x in range(1, self.k + 1)] @@ -209,7 +208,7 @@ def get_cluster_label_ids(self) -> pd.Series: labels = labels.map(self.cluster_no_mapping) return labels - def get_cluster_labels(self, include_short=True) -> ArrayLike: + def get_cluster_labels(self, include_short=True) -> np.ndarray: labels = self.get_cluster_label_ids() def f(x): @@ -220,7 +219,7 @@ def f(x): label_array = get_cluster_labels - def get_cluster_descs(self) -> ArrayLike: + def get_cluster_descs(self) -> np.ndarray: labels = self.get_cluster_label_ids() labels = labels.apply(lambda x: self.get_label_desc(n=x)) return np.array(labels) @@ -350,7 +349,7 @@ def s_score(kmeans): df = pd.DataFrame({"n": range(start, stop, step)}) df["k_means"] = df["n"].apply(self.get_clusters) df["sum_squares"] = df["k_means"].apply(lambda x: x.inertia_) - df["silhouette"] = df["k_means"].apply(s_score) + df["silhouette"] = df["k_means"].apply(s_score) # type: ignore plt.rcParams["figure.figsize"] = (10, 5) # type: ignore plt.subplot(1, 2, 1)