Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐝 Update pandas to 2.1 #2089

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions etl/datadiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,10 @@ def cli(
ds_a = _match_dataset(path_to_ds_a, path)
ds_b = _match_dataset(path_to_ds_b, path)

if ds_a and ds_b and ds_a.metadata.source_checksum == ds_b.metadata.source_checksum:
# skip if they have the same source checksum, note that we're not comparing checksum of actual data
# to improve performance. Source checksum should be enough
continue
# if ds_a and ds_b and ds_a.metadata.source_checksum == ds_b.metadata.source_checksum:
# # skip if they have the same source checksum, note that we're not comparing checksum of actual data
# # to improve performance. Source checksum should be enough
# continue

lines = []

Expand Down Expand Up @@ -389,8 +389,8 @@ def _index_equals(table_a: pd.DataFrame, table_b: pd.DataFrame, sample: int = 10

def _dict_diff(dict_a: Dict[str, Any], dict_b: Dict[str, Any], tabs: int = 0, **kwargs) -> str:
"""Convert dictionaries into YAML and compare them using difflib. Return colored diff as a string."""
meta_a = yaml_dump(dict_a, **kwargs)
meta_b = yaml_dump(dict_b, **kwargs)
meta_a = yaml_dump(dict_a, sort_keys=True, **kwargs)
meta_b = yaml_dump(dict_b, sort_keys=True, **kwargs)

lines = difflib.ndiff(meta_a.splitlines(keepends=True), meta_b.splitlines(keepends=True)) # type: ignore
# do not print lines that are identical
Expand Down
3 changes: 2 additions & 1 deletion etl/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,15 @@ def yaml_dump(
strip_lines: bool = True,
replace_confusing_ascii: bool = False,
width: int = 120,
sort_keys: bool = False,
) -> Optional[str]:
"""Alternative to yaml.dump which produces good looking multi-line strings and perserves ordering
of keys. If strip_lines is True, all lines in the string will be stripped and all tabs will be
replaced by two spaces."""
# strip lines, otherwise YAML won't output strings in literal format
if strip_lines:
d = _strip_lines_in_dict(d)
s = yaml.dump(d, stream=stream, sort_keys=False, allow_unicode=True, Dumper=_MyDumper, width=width)
s = yaml.dump(d, stream=stream, sort_keys=sort_keys, allow_unicode=True, Dumper=_MyDumper, width=width)
if replace_confusing_ascii:
assert s, "replace_confusing_ascii does not work for streams"
s = (
Expand Down
5 changes: 0 additions & 5 deletions lib/catalog/owid/catalog/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import structlog
from pandas._typing import FilePath, ReadCsvBuffer, Scalar # type: ignore
from pandas.core.series import Series
from pandas.util._decorators import rewrite_axis_style_signature

from owid.repack import repack_frame

Expand Down Expand Up @@ -425,10 +424,6 @@ def equals_table(self, table: "Table") -> bool:
and self._fields == table._fields
)

@rewrite_axis_style_signature(
"mapper",
[("copy", True), ("inplace", False), ("level", None), ("errors", "ignore")],
)
def rename(self, *args: Any, **kwargs: Any) -> Optional["Table"]:
"""Rename columns while keeping their metadata."""
inplace = kwargs.get("inplace")
Expand Down
2 changes: 1 addition & 1 deletion lib/catalog/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ homepage = "https://github.com/owid/owid-grapher-py"

[tool.poetry.dependencies]
python = ">=3.8.1, <3.12"
pandas = ">=1.3.3,<2.0"
pandas = ">=2.1.0,<2.2.0"
jsonschema = ">=3.2.0"
pyarrow = ">=10.0.1"
ipdb = ">=0.13.9"
Expand Down
Loading
Loading