Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
FBruzzesi committed Nov 6, 2024
1 parent 0aeb884 commit 1e0d4ae
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- over
- pipe
- quantile
- rank
- round
- sample
- shift
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
- null_count
- pipe
- quantile
- rank
- rename
- round
- sample
Expand Down
10 changes: 10 additions & 0 deletions narwhals/_arrow/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,16 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"],
*,
descending: bool,
) -> Self:
return reuse_series_implementation(
self, "rank", method=method, descending=descending
)

@property
def dt(self: Self) -> ArrowExprDateTimeNamespace:
return ArrowExprDateTimeNamespace(self)
Expand Down
19 changes: 19 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,25 @@ def mode(self: Self) -> ArrowSeries:
plx.col(col_token) == plx.col(col_token).max()
)[self.name]

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"],
*,
descending: bool,
) -> Self:
import pyarrow as pa # ignore-banned-import
import pyarrow.compute as pc # ignore-banned-import

sort_keys = "descending" if descending else "ascending"
tiebreaker = "first" if method == "ordinal" else method
native_series = self._native_series
null_mask = pc.is_null(native_series)

rank = pc.rank(native_series, sort_keys=sort_keys, tiebreaker=tiebreaker)

result = pc.if_else(null_mask, pa.scalar(None), rank)
return self._from_native_series(result)

def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

Expand Down
10 changes: 10 additions & 0 deletions narwhals/_pandas_like/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,16 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
def mode(self: Self) -> Self:
return reuse_series_implementation(self, "mode")

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"],
*,
descending: bool,
) -> Self:
return reuse_series_implementation(
self, "rank", method=method, descending=descending
)

@property
def str(self: Self) -> PandasLikeExprStringNamespace:
return PandasLikeExprStringNamespace(self)
Expand Down
14 changes: 14 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,20 @@ def mode(self: Self) -> Self:
def __iter__(self: Self) -> Iterator[Any]:
yield from self._native_series.__iter__()

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"],
*,
descending: bool,
) -> Self:
result = self._native_series.rank(
method="first" if method == "ordinal" else method,
na_option="keep",
ascending=not descending,
pct=False,
)
return self._from_native_series(result)

@property
def str(self) -> PandasLikeSeriesStringNamespace:
return PandasLikeSeriesStringNamespace(self)
Expand Down
91 changes: 91 additions & 0 deletions narwhals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2310,6 +2310,97 @@ def mode(self: Self) -> Self:
"""
return self.__class__(lambda plx: self._call(plx).mode())

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"] = "average",
*,
descending: bool = False,
) -> Self:
"""
Assign ranks to data, dealing with ties appropriately.
Arguments:
method: The method used to assign ranks to tied elements.
The following methods are available (default is 'average'):
- 'average' : The average of the ranks that would have been assigned to
all the tied values is assigned to each value.
- 'min' : The minimum of the ranks that would have been assigned to all
the tied values is assigned to each value. (This is also referred to
as "competition" ranking.)
- 'max' : The maximum of the ranks that would have been assigned to all
the tied values is assigned to each value.
- 'dense' : Like 'min', but the rank of the next highest element is
assigned the rank immediately after those assigned to the tied
elements.
- 'ordinal' : All values are given a distinct rank, corresponding to the
order that the values occur in the Series.
descending: Rank in descending order.
Examples
--------
The 'average' method:
>>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
>>> df.select(pl.col("a").rank())
shape: (5, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 3.0 │
│ 4.5 │
│ 1.5 │
│ 1.5 │
│ 4.5 │
└─────┘
The 'ordinal' method:
>>> df = pl.DataFrame({"a": [3, 6, 1, 1, 6]})
>>> df.select(pl.col("a").rank("ordinal"))
shape: (5, 1)
┌─────┐
│ a │
│ --- │
│ u32 │
╞═════╡
│ 3 │
│ 4 │
│ 1 │
│ 2 │
│ 5 │
└─────┘
Use 'rank' with 'over' to rank within groups:
>>> df = pl.DataFrame({"a": [1, 1, 2, 2, 2], "b": [6, 7, 5, 14, 11]})
>>> df.with_columns(pl.col("b").rank().over("a").alias("rank"))
shape: (5, 3)
┌─────┬─────┬──────┐
│ a ┆ b ┆ rank │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f64 │
╞═════╪═════╪══════╡
│ 1 ┆ 6 ┆ 1.0 │
│ 1 ┆ 7 ┆ 2.0 │
│ 2 ┆ 5 ┆ 1.0 │
│ 2 ┆ 14 ┆ 3.0 │
│ 2 ┆ 11 ┆ 2.0 │
└─────┴─────┴──────┘
"""

supported_rank_methods = {"average", "min", "max", "dense"}
if method not in supported_rank_methods:
msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
raise ValueError(msg)

return self.__class__(
lambda plx: self._call(plx).rank(method=method, descending=descending)
)

@property
def str(self: Self) -> ExprStringNamespace[Self]:
return ExprStringNamespace(self)
Expand Down
51 changes: 51 additions & 0 deletions narwhals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2525,6 +2525,57 @@ def mode(self: Self) -> Self:
def __iter__(self: Self) -> Iterator[Any]:
yield from self._compliant_series.__iter__()

def rank(
self: Self,
method: Literal["average", "min", "max", "dense", "ordinal"] = "average",
*,
descending: bool = False,
) -> Self:
"""
Assign ranks to data, dealing with ties appropriately.
Arguments:
method: The method used to assign ranks to tied elements.
The following methods are available (default is 'average'):
- 'average' : The average of the ranks that would have been assigned to
all the tied values is assigned to each value.
- 'min' : The minimum of the ranks that would have been assigned to all
the tied values is assigned to each value. (This is also referred to
as "competition" ranking.)
- 'max' : The maximum of the ranks that would have been assigned to all
the tied values is assigned to each value.
- 'dense' : Like 'min', but the rank of the next highest element is
assigned the rank immediately after those assigned to the tied
elements.
- 'ordinal' : All values are given a distinct rank, corresponding to the
order that the values occur in the Series.
descending: Rank in descending order.
Examples:
>>> s = pl.Series("a", [3, 6, 1, 1, 6])
>>> s.rank()
shape: (5,)
Series: 'a' [f64]
[
3.0
4.5
1.5
1.5
4.5
]
"""
supported_rank_methods = {"average", "min", "max", "dense"}
if method not in supported_rank_methods:
msg = f"Ranking method must be one of {supported_rank_methods}. Found '{method}'"
raise ValueError(msg)

return self._from_compliant_series(
self._compliant_series.rank(method=method, descending=descending)
)

@property
def str(self: Self) -> SeriesStringNamespace[Self]:
return SeriesStringNamespace(self)
Expand Down

0 comments on commit 1e0d4ae

Please sign in to comment.