diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4eca8ac29..f967d925a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,6 +23,28 @@ repos:
       files: \.(py|rst|md)$
       args: [--ignore-words-list=ser]
       exclude: ^docs/api-completeness.md$
+-   repo: https://github.com/pycqa/flake8
+    rev: '7.1.1'  # todo: remove once https://github.com/astral-sh/ruff/issues/458 is addressed
+    hooks:
+    -   id: flake8
+        additional_dependencies: [darglint==1.8.1, Flake8-pyproject]
+        entry: flake8 --select DAR --ignore DAR402,DAR401
+        exclude: |
+            (?x)^(
+                tests/.*|
+                # TODO: gradually enable
+                narwhals/series\.py$|
+                # TODO: gradually enable
+                narwhals/dataframe\.py$|
+                # TODO: gradually enable
+                narwhals/dependencies\.py$|
+                # some false positives in this one
+                narwhals/translate\.py$|
+                # some false positives in this one
+                narwhals/stable/v1/__init__\.py$|
+                # private, so less urgent to document too well
+                narwhals/_.*
+            )$
 - repo: local
   hooks:
     - id: check-api-reference
diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py
index 278e7d71a..719bd989f 100644
--- a/narwhals/dtypes.py
+++ b/narwhals/dtypes.py
@@ -551,7 +551,11 @@ def __repr__(self) -> str:
         return f"{class_name}({dict(self)})"
 
     def to_schema(self) -> OrderedDict[str, DType | type[DType]]:
-        """Return Struct dtype as a schema dict."""
+        """Return Struct dtype as a schema dict.
+
+        Returns:
+            Mapping from column name to dtype.
+        """
         return OrderedDict(self)
 
 
diff --git a/narwhals/expr.py b/narwhals/expr.py
index fce4dc5c4..bc90721d7 100644
--- a/narwhals/expr.py
+++ b/narwhals/expr.py
@@ -93,6 +93,11 @@ def alias(self, name: str) -> Self:
     def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
         """Pipe function call.
 
+        Arguments:
+            function: Function to apply.
+            args: Positional arguments to pass to function.
+            kwargs: Keyword arguments to pass to function.
+
         Returns:
             A new expression.
 
@@ -1075,6 +1080,9 @@ def diff(self) -> Self:
     def shift(self, n: int) -> Self:
         """Shift values by `n` positions.
 
+        Arguments:
+            n: Number of positions to shift values by.
+
         Returns:
             A new expression.
 
@@ -1310,9 +1318,7 @@ def is_between(
 
         Arguments:
             lower_bound: Lower bound value.
-
             upper_bound: Upper bound value.
-
             closed: Define which sides of the interval are closed (inclusive).
 
         Returns:
@@ -1427,6 +1433,9 @@ def is_in(self, other: Any) -> Self:
     def filter(self, *predicates: Any) -> Self:
         """Filters elements based on a condition, returning a new expression.
 
+        Arguments:
+            predicates: Conditions to filter by (which get ANDed together).
+
         Returns:
             A new expression.
 
@@ -1607,9 +1616,7 @@ def fill_null(
 
         Arguments:
             value: Value used to fill null values.
-
             strategy: Strategy used to fill null values.
-
             limit: Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.
 
         Returns:
diff --git a/narwhals/functions.py b/narwhals/functions.py
index bfc7c7595..e3f3751e3 100644
--- a/narwhals/functions.py
+++ b/narwhals/functions.py
@@ -46,12 +46,11 @@ def concat(
 
     Arguments:
         items: DataFrames, LazyFrames to concatenate.
-
         how: {'vertical', 'horizontal'}
-            * vertical: Stacks Series from DataFrames vertically and fills with `null`
-              if the lengths don't match.
-            * horizontal: Stacks Series from DataFrames horizontally and fills with `null`
-              if the lengths don't match.
+            - vertical: Stacks Series from DataFrames vertically and fills with `null`
+                if the lengths don't match.
+            - horizontal: Stacks Series from DataFrames horizontally and fills with `null`
+                if the lengths don't match.
 
     Returns:
         A new DataFrame, Lazyframe resulting from the concatenation.
@@ -534,6 +533,8 @@ def _get_sys_info() -> dict[str, str]:
 
     Copied from sklearn
 
+    Returns:
+        Dictionary with system info.
     """
     python = sys.version.replace("\n", " ")
 
@@ -556,6 +557,8 @@ def _get_deps_info() -> dict[str, str]:
 
     This function and show_versions were copied from sklearn and adapted
 
+    Returns:
+        Mapping from dependency to version.
     """
     deps = (
         "pandas",
@@ -607,9 +610,13 @@ def get_level(
 ) -> Literal["full", "interchange"]:
     """Level of support Narwhals has for current object.
 
-    This can be one of:
+    Arguments:
+        obj: Dataframe or Series.
+
+    Returns:
+        This can be one of:
 
-    - 'full': full Narwhals API support
-    - 'metadata': only metadata operations are supported (`df.schema`)
+            - 'full': full Narwhals API support
+            - 'metadata': only metadata operations are supported (`df.schema`)
     """
     return obj._level
diff --git a/narwhals/group_by.py b/narwhals/group_by.py
index 82054da60..13e625e92 100644
--- a/narwhals/group_by.py
+++ b/narwhals/group_by.py
@@ -35,9 +35,11 @@ def agg(
         Arguments:
             aggs: Aggregations to compute for each group of the group by operation,
                 specified as positional arguments.
-
             named_aggs: Additional aggregations, specified as keyword arguments.
 
+        Returns:
+            A new Dataframe.
+
         Examples:
             Group by one column or by multiple columns and call `agg` to compute
             the grouped sum of another column.
diff --git a/narwhals/schema.py b/narwhals/schema.py
index 0d760c8ac..1d357b766 100644
--- a/narwhals/schema.py
+++ b/narwhals/schema.py
@@ -60,13 +60,25 @@ def __init__(
         super().__init__(schema)
 
     def names(self) -> list[str]:
-        """Get the column names of the schema."""
+        """Get the column names of the schema.
+
+        Returns:
+            Column names.
+        """
         return list(self.keys())
 
     def dtypes(self) -> list[DType]:
-        """Get the data types of the schema."""
+        """Get the data types of the schema.
+
+        Returns:
+            Data types of schema.
+        """
         return list(self.values())
 
     def len(self) -> int:
-        """Get the number of columns in the schema."""
+        """Get the number of columns in the schema.
+
+        Returns:
+            Number of columns.
+        """
         return len(self)
diff --git a/narwhals/selectors.py b/narwhals/selectors.py
index b9c484a37..ab23ed27e 100644
--- a/narwhals/selectors.py
+++ b/narwhals/selectors.py
@@ -15,6 +15,9 @@ def by_dtype(*dtypes: Any) -> Expr:
     Arguments:
         dtypes: one or data types to select
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
@@ -55,6 +58,9 @@ def by_dtype(*dtypes: Any) -> Expr:
 def numeric() -> Expr:
     """Select numeric columns.
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
@@ -95,6 +101,9 @@ def numeric() -> Expr:
 def boolean() -> Expr:
     """Select boolean columns.
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
@@ -135,6 +144,9 @@ def boolean() -> Expr:
 def string() -> Expr:
     """Select string columns.
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
@@ -175,6 +187,9 @@ def string() -> Expr:
 def categorical() -> Expr:
     """Select categorical columns.
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
@@ -215,6 +230,9 @@ def categorical() -> Expr:
 def all() -> Expr:
     """Select all columns.
 
+    Returns:
+        A new expression.
+
     Examples:
         >>> import narwhals as nw
         >>> import narwhals.selectors as ncs
diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
index 3d97fa3d8..e53eb2f92 100644
--- a/narwhals/stable/v1/__init__.py
+++ b/narwhals/stable/v1/__init__.py
@@ -1090,58 +1090,25 @@ def narwhalify(
 ) -> Callable[..., Any]:
     """Decorate function so it becomes dataframe-agnostic.
 
-    `narwhalify` will try to convert any dataframe/series-like object into the narwhal
+    This will try to convert any dataframe/series-like object into the Narwhals
     respective DataFrame/Series, while leaving the other parameters as they are.
-
-    Similarly, if the output of the function is a narwhals DataFrame or Series, it will be
+    Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
     converted back to the original dataframe/series type, while if the output is another
     type it will be left as is.
-
     By setting `pass_through=False`, then every input and every output will be required to be a
     dataframe/series-like object.
 
-    Instead of writing
-
-    ```python
-    import narwhals as nw
-
-
-    def func(df):
-        df = nw.from_native(df, pass_through=True)
-        df = df.group_by("a").agg(nw.col("b").sum())
-        return nw.to_native(df)
-    ```
-
-    you can just write
-
-    ```python
-    import narwhals as nw
-
-
-    @nw.narwhalify
-    def func(df):
-        return df.group_by("a").agg(nw.col("b").sum())
-    ```
-
-    You can also pass in extra arguments, e.g.
-
-    ```python
-    @nw.narwhalify(eager_only=True)
-    ```
-
-    that will get passed down to `nw.from_native`.
-
     Arguments:
         func: Function to wrap in a `from_native`-`to_native` block.
-        strict: Determine what happens if the object isn't supported by Narwhals:
+        strict: **Deprecated** (v1.13.0):
+            Please use `pass_through` instead. Note that `strict` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/).
+
+            Determine what happens if the object isn't supported by Narwhals:
 
             - `True` (default): raise an error
             - `False`: pass object through as-is
-
-            **Deprecated** (v1.13.0):
-                Please use `pass_through` instead. Note that `strict` is still available
-                (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
-                see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/).
         pass_through: Determine what happens if the object isn't supported by Narwhals:
 
             - `False` (default): raise an error
@@ -1151,6 +1118,24 @@ def func(df):
             implement the Dataframe Interchange Protocol.
         series_only: Whether to only allow series.
         allow_series: Whether to allow series (default is only dataframe / lazyframe).
+
+    Returns:
+        Decorated function.
+
+    Examples:
+        Instead of writing
+
+        >>> import narwhals as nw
+        >>> def func(df):
+        ...     df = nw.from_native(df, pass_through=True)
+        ...     df = df.group_by("a").agg(nw.col("b").sum())
+        ...     return nw.to_native(df)
+
+        you can just write
+
+        >>> @nw.narwhalify
+        ... def func(df):
+        ...     return df.group_by("a").agg(nw.col("b").sum())
     """
     pass_through = validate_strict_and_pass_though(
         strict, pass_through, pass_through_default=True, emit_deprecation_warning=False
@@ -2102,12 +2087,11 @@ def concat(
 
     Arguments:
         items: DataFrames, LazyFrames to concatenate.
-
         how: {'vertical', 'horizontal'}
-            * vertical: Stacks Series from DataFrames vertically and fills with `null`
-              if the lengths don't match.
-            * horizontal: Stacks Series from DataFrames horizontally and fills with `null`
-              if the lengths don't match.
+            - vertical: Stacks Series from DataFrames vertically and fills with `null`
+                if the lengths don't match.
+            - horizontal: Stacks Series from DataFrames horizontally and fills with `null`
+                if the lengths don't match.
 
     Returns:
         A new DataFrame, Lazyframe resulting from the concatenation.
diff --git a/narwhals/translate.py b/narwhals/translate.py
index 54653b1e7..64dd10593 100644
--- a/narwhals/translate.py
+++ b/narwhals/translate.py
@@ -713,9 +713,15 @@ def _from_native_impl(  # noqa: PLR0915
     return native_object
 
 
-def get_native_namespace(obj: Any) -> Any:
+def get_native_namespace(obj: DataFrame[Any] | LazyFrame[Any] | Series) -> Any:
     """Get native namespace from object.
 
+    Arguments:
+        obj: Dataframe, Lazyframe, or Series.
+
+    Returns:
+        Native module.
+
     Examples:
         >>> import polars as pl
         >>> import pandas as pd
@@ -742,58 +748,25 @@ def narwhalify(
 ) -> Callable[..., Any]:
     """Decorate function so it becomes dataframe-agnostic.
 
-    `narwhalify` will try to convert any dataframe/series-like object into the narwhal
+    This will try to convert any dataframe/series-like object into the Narwhals
     respective DataFrame/Series, while leaving the other parameters as they are.
-
-    Similarly, if the output of the function is a narwhals DataFrame or Series, it will be
+    Similarly, if the output of the function is a Narwhals DataFrame or Series, it will be
     converted back to the original dataframe/series type, while if the output is another
     type it will be left as is.
-
     By setting `pass_through=False`, then every input and every output will be required to be a
     dataframe/series-like object.
 
-    Instead of writing
-
-    ```python
-    import narwhals as nw
-
-
-    def func(df):
-        df = nw.from_native(df, pass_through=True)
-        df = df.group_by("a").agg(nw.col("b").sum())
-        return nw.to_native(df)
-    ```
-
-    you can just write
-
-    ```python
-    import narwhals as nw
-
-
-    @nw.narwhalify
-    def func(df):
-        return df.group_by("a").agg(nw.col("b").sum())
-    ```
-
-    You can also pass in extra arguments, e.g.
-
-    ```python
-    @nw.narwhalify(eager_only=True)
-    ```
-
-    that will get passed down to `nw.from_native`.
-
     Arguments:
         func: Function to wrap in a `from_native`-`to_native` block.
-        strict: Determine what happens if the object isn't supported by Narwhals:
+        strict: **Deprecated** (v1.13.0):
+            Please use `pass_through` instead. Note that `strict` is still available
+            (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+            see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/).
+
+            Determine what happens if the object isn't supported by Narwhals:
 
             - `True` (default): raise an error
             - `False`: pass object through as-is
-
-            **Deprecated** (v1.13.0):
-                Please use `pass_through` instead. Note that `strict` is still available
-                (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
-                see [perfect backwards compatibility policy](https://narwhals-dev.github.io/narwhals/backcompat/).
         pass_through: Determine what happens if the object isn't supported by Narwhals:
 
             - `False` (default): raise an error
@@ -803,6 +776,24 @@ def func(df):
             implement the Dataframe Interchange Protocol.
         series_only: Whether to only allow series.
         allow_series: Whether to allow series (default is only dataframe / lazyframe).
+
+    Returns:
+        Decorated function.
+
+    Examples:
+        Instead of writing
+
+        >>> import narwhals as nw
+        >>> def func(df):
+        ...     df = nw.from_native(df, pass_through=True)
+        ...     df = df.group_by("a").agg(nw.col("b").sum())
+        ...     return nw.to_native(df)
+
+        you can just write
+
+        >>> @nw.narwhalify
+        ... def func(df):
+        ...     return df.group_by("a").agg(nw.col("b").sum())
     """
     from narwhals.utils import validate_strict_and_pass_though
 
@@ -863,6 +854,12 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
 def to_py_scalar(scalar_like: Any) -> Any:
     """If a scalar is not Python native, converts it to Python native.
 
+    Arguments:
+        scalar_like: Scalar-like value.
+
+    Returns:
+        Python scalar.
+
     Raises:
         ValueError: If the object is not convertible to a scalar.
 
diff --git a/narwhals/utils.py b/narwhals/utils.py
index ee5225948..7f5e1b4ce 100644
--- a/narwhals/utils.py
+++ b/narwhals/utils.py
@@ -56,7 +56,14 @@ class Implementation(Enum):
     def from_native_namespace(
         cls: type[Self], native_namespace: ModuleType
     ) -> Implementation:  # pragma: no cover
-        """Instantiate Implementation object from a native namespace module."""
+        """Instantiate Implementation object from a native namespace module.
+
+        Arguments:
+            native_namespace: Native namespace.
+
+        Returns:
+            Implementation.
+        """
         mapping = {
             get_pandas(): Implementation.PANDAS,
             get_modin(): Implementation.MODIN,
@@ -68,7 +75,11 @@ def from_native_namespace(
         return mapping.get(native_namespace, Implementation.UNKNOWN)
 
     def to_native_namespace(self: Self) -> ModuleType:
-        """Return the native namespace module corresponding to Implementation."""
+        """Return the native namespace module corresponding to Implementation.
+
+        Returns:
+            Native module.
+        """
         mapping = {
             Implementation.PANDAS: get_pandas(),
             Implementation.MODIN: get_modin(),
@@ -127,7 +138,14 @@ def _is_iterable(arg: Any | Iterable[Any]) -> bool:
 
 
 def parse_version(version: Sequence[str | int]) -> tuple[int, ...]:
-    """Simple version parser; split into a tuple of ints for comparison."""
+    """Simple version parser; split into a tuple of ints for comparison.
+
+    Arguments:
+        version: Version string to parse.
+
+    Returns:
+        Parsed version number.
+    """
     # lifted from Polars
     if isinstance(version, str):  # pragma: no cover
         version = version.split(".")
@@ -157,6 +175,13 @@ def validate_laziness(items: Iterable[Any]) -> None:
 def maybe_align_index(lhs: T, rhs: Series | BaseFrame[Any]) -> T:
     """Align `lhs` to the Index of `rhs`, if they're both pandas-like.
 
+    Arguments:
+        lhs: Dataframe or Series.
+        rhs: Dataframe or Series to align with.
+
+    Returns:
+        Same type as input.
+
     Notes:
         This is only really intended for backwards-compatibility purposes,
         for example if your library already aligns indices for users.
@@ -245,6 +270,12 @@ def _validate_index(index: Any) -> None:
 def maybe_get_index(obj: T) -> Any | None:
     """Get the index of a DataFrame or a Series, if it's pandas-like.
 
+    Arguments:
+        obj: Dataframe or Series.
+
+    Returns:
+        Same type as input.
+
     Notes:
         This is only really intended for backwards-compatibility purposes,
         for example if your library already aligns indices for users.
@@ -289,6 +320,9 @@ def maybe_set_index(
             `ValueError` is raised.
         index: series or list of series to set as index.
 
+    Returns:
+        Same type as input.
+
     Raises:
         ValueError: If one of the following condition happens:
 
@@ -363,6 +397,12 @@ def maybe_set_index(
 def maybe_reset_index(obj: T) -> T:
     """Reset the index to the default integer index of a DataFrame or a Series, if it's pandas-like.
 
+    Arguments:
+        obj: Dataframe or Series.
+
+    Returns:
+        Same type as input.
+
     Notes:
         This is only really intended for backwards-compatibility purposes,
         for example if your library already resets the index for users.
@@ -426,6 +466,9 @@ def maybe_convert_dtypes(obj: T, *args: bool, **kwargs: bool | str) -> T:
         *args: Additional arguments which gets passed through.
         **kwargs: Additional arguments which gets passed through.
 
+    Returns:
+        Same type as input.
+
     Notes:
         For non-pandas-like inputs, this is a no-op.
         Also, `args` and `kwargs` just get passed down to the underlying library as-is.
@@ -479,6 +522,12 @@ def is_ordered_categorical(series: Series) -> bool:
     - For PyArrow table:
       - Categoricals are ordered if `dtype.type.ordered == True`.
 
+    Arguments:
+        series: Input Series.
+
+    Returns:
+        Whether the Series is an ordered categorical.
+
     Examples:
         >>> import narwhals as nw
         >>> import pandas as pd
@@ -601,6 +650,9 @@ def is_sequence_but_not_str(sequence: Any) -> TypeGuard[Sequence[Any]]:
 def find_stacklevel() -> int:
     """Find the first place in the stack that is not inside narwhals.
 
+    Returns:
+        Stacklevel.
+
     Taken from:
     https://github.com/pandas-dev/pandas/blob/ab89c53f48df67709a533b6a95ce3d911871a0a8/pandas/util/_exceptions.py#L30-L51
     """
@@ -640,13 +692,10 @@ def find_stacklevel() -> int:
 def issue_deprecation_warning(message: str, _version: str) -> None:
     """Issue a deprecation warning.
 
-    Parameters
-    ----------
-    message
-        The message associated with the warning.
-    version
-        Narwhals version when the warning was introduced. Just used for internal
-        bookkeeping.
+    Arguments:
+        message: The message associated with the warning.
+        _version: Narwhals version when the warning was introduced. Just used for internal
+            bookkeeping.
     """
     warn(message=message, category=DeprecationWarning, stacklevel=find_stacklevel())
 
diff --git a/tests/expr_and_series/cum_max_test.py b/tests/expr_and_series/cum_max_test.py
index 2a9c634f0..054537d34 100644
--- a/tests/expr_and_series/cum_max_test.py
+++ b/tests/expr_and_series/cum_max_test.py
@@ -27,7 +27,9 @@ def test_cum_max_expr(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor
+    ):
         request.applymarker(pytest.mark.xfail)
 
     name = "reverse_cum_max" if reverse else "cum_max"
@@ -45,7 +47,9 @@ def test_cum_max_series(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor_eager
+    ):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
diff --git a/tests/expr_and_series/cum_min_test.py b/tests/expr_and_series/cum_min_test.py
index c5a2dbf99..bb92f5b9d 100644
--- a/tests/expr_and_series/cum_min_test.py
+++ b/tests/expr_and_series/cum_min_test.py
@@ -27,7 +27,9 @@ def test_cum_min_expr(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor
+    ):
         request.applymarker(pytest.mark.xfail)
 
     name = "reverse_cum_min" if reverse else "cum_min"
@@ -45,7 +47,9 @@ def test_cum_min_series(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor_eager
+    ):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)
diff --git a/tests/expr_and_series/cum_prod_test.py b/tests/expr_and_series/cum_prod_test.py
index 78e5a036a..1d5816ff2 100644
--- a/tests/expr_and_series/cum_prod_test.py
+++ b/tests/expr_and_series/cum_prod_test.py
@@ -27,7 +27,9 @@ def test_cum_prod_expr(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor
+    ):
         request.applymarker(pytest.mark.xfail)
 
     name = "reverse_cum_prod" if reverse else "cum_prod"
@@ -45,7 +47,9 @@ def test_cum_prod_series(
     if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager):
         request.applymarker(pytest.mark.xfail)
 
-    if PANDAS_VERSION < (2, 1) and "pandas_pyarrow" in str(constructor_eager):
+    if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str(
+        constructor_eager
+    ):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor_eager(data), eager_only=True)