From 0e2099089406c6d5616bf9e8872154fee4960ea7 Mon Sep 17 00:00:00 2001
From: Xiao Yuan <yuanx749@gmail.com>
Date: Wed, 20 Nov 2024 22:47:33 +0800
Subject: [PATCH 1/5] BUG: fix to_datetime with np.datetime64[ps] giving wrong
 conversion (#60342)

---
 doc/source/whatsnew/v3.0.0.rst                        |  1 +
 .../_libs/src/vendored/numpy/datetime/np_datetime.c   | 11 ++++++-----
 pandas/tests/tools/test_to_datetime.py                |  9 +++++++++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7da2f968b900b..5f7aed8ed9786 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -626,6 +626,7 @@ Datetimelike
 - Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
 - Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
 - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
+- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
 Timedelta
diff --git a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
index cc65f34d6b6fe..9a022095feee9 100644
--- a/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
+++ b/pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
@@ -660,11 +660,12 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base,
     perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
 
     set_datetimestruct_days(extract_unit(&dt, perday), out);
-    out->hour = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
-    out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
-    out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000);
-    out->us = (npy_int32)extract_unit(&dt, 1000LL);
-    out->ps = (npy_int32)(dt * 1000);
+    out->hour =
+        (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60);
+    out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60);
+    out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
+    out->us = (npy_int32)extract_unit(&dt, 1000LL * 1000);
+    out->ps = (npy_int32)(dt);
     break;
 
   case NPY_FR_fs:
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index a9d3c235f63f6..b73839f406a29 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -3668,3 +3668,12 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
                 to_datetime(vec, format="mixed")
             with pytest.raises(ValueError, match=msg):
                 DatetimeIndex(vec)
+
+
+def test_to_datetime_wrapped_datetime64_ps():
+    # GH#60341
+    result = to_datetime([np.datetime64(1901901901901, "ps")])
+    expected = DatetimeIndex(
+        ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None
+    )
+    tm.assert_index_equal(result, expected)

From ff53ca1486dd10b0f2883987f082a79f3a55c409 Mon Sep 17 00:00:00 2001
From: Tuhin Sharma <tuhinsharma121@gmail.com>
Date: Thu, 21 Nov 2024 00:21:30 +0530
Subject: [PATCH 2/5] DOC: fix SA01 for pandas.errors.AttributeConflictWarning
 (#60367)

* DOC: fix SA01 for pandas.errors.AttributeConflictWarning

* DOC: fix SA01 for pandas.errors.AttributeConflictWarning
---
 ci/code_checks.sh         | 1 -
 pandas/errors/__init__.py | 6 ++++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 53690e9b78b8a..fe45ce02d5e44 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -114,7 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.resample.Resampler.std SA01" \
         -i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
         -i "pandas.core.resample.Resampler.var SA01" \
-        -i "pandas.errors.AttributeConflictWarning SA01" \
         -i "pandas.errors.ChainedAssignmentError SA01" \
         -i "pandas.errors.DuplicateLabelError SA01" \
         -i "pandas.errors.IntCastingNaNError SA01" \
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index cacbfb49c311f..84f7239c6549d 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -672,6 +672,12 @@ class AttributeConflictWarning(Warning):
     name than the existing index on an HDFStore or attempting to append an index with a
     different frequency than the existing index on an HDFStore.
 
+    See Also
+    --------
+    HDFStore : Dict-like IO interface for storing pandas objects in PyTables.
+    DataFrame.to_hdf : Write the contained data to an HDF5 file using HDFStore.
+    read_hdf : Read from an HDF5 file into a DataFrame.
+
     Examples
     --------
     >>> idx1 = pd.Index(["a", "b"], name="name1")

From 24df015ad4ada9f58e6874b54737e579a62a7a53 Mon Sep 17 00:00:00 2001
From: ensalada-de-pechuga
 <127701043+ensalada-de-pechuga@users.noreply.github.com>
Date: Thu, 21 Nov 2024 03:55:02 +0900
Subject: [PATCH 3/5] DOC: Fix docstrings for SeriesGroupBy monotonic and nth
 (#60375)

* fix docstrings and remove from code_checks.sh

* fix SeriesGroupBy.is_monotonic_decreasing See Also section (decreasing -> increasing)

* remove DataFrameGroupBy.nth from code_checks.sh

---------

Co-authored-by: root <root@localhost.localdomain>
---
 ci/code_checks.sh              |  4 ----
 pandas/core/groupby/generic.py | 10 ++++++++++
 pandas/core/groupby/groupby.py | 13 -------------
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index fe45ce02d5e44..633d767c63037 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -92,15 +92,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
         -i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-        -i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
         -i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
         -i "pandas.core.resample.Resampler.get_group RT03,SA01" \
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5ba382bf66bb7..35ec09892ede6 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1443,6 +1443,11 @@ def is_monotonic_increasing(self) -> Series:
         -------
         Series
 
+        See Also
+        --------
+        SeriesGroupBy.is_monotonic_decreasing : Return whether each group's values
+            are monotonically decreasing.
+
         Examples
         --------
         >>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])
@@ -1462,6 +1467,11 @@ def is_monotonic_decreasing(self) -> Series:
         -------
         Series
 
+        See Also
+        --------
+        SeriesGroupBy.is_monotonic_increasing : Return whether each group's values
+            are monotonically increasing.
+
         Examples
         --------
         >>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 9c30132347111..ad23127ad449f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3983,19 +3983,6 @@ def nth(self) -> GroupByNthSelector:
         'all' or 'any'; this is equivalent to calling dropna(how=dropna)
         before the groupby.
 
-        Parameters
-        ----------
-        n : int, slice or list of ints and slices
-            A single nth value for the row or a list of nth values or slices.
-
-            .. versionchanged:: 1.4.0
-                Added slice and lists containing slices.
-                Added index notation.
-
-        dropna : {'any', 'all', None}, default None
-            Apply the specified dropna operation before counting which row is
-            the nth row. Only supported if n is an int.
-
         Returns
         -------
         Series or DataFrame

From 72ab3fdc7a3530b885a466db88bbb38de8d5c6b9 Mon Sep 17 00:00:00 2001
From: Ivruix <52746744+Ivruix@users.noreply.github.com>
Date: Wed, 20 Nov 2024 22:00:08 +0300
Subject: [PATCH 4/5] DOC: fix docstring validation errors for
 pandas.Series.dt.freq (#60377)

* Added docs for Series.dt.freq and removed from ci/code_checks.sh

* Fix code style
---
 ci/code_checks.sh                |  1 -
 pandas/core/indexes/accessors.py | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 633d767c63037..379f7cb5f037d 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Period.freq GL08" \
         -i "pandas.Period.ordinal GL08" \
         -i "pandas.RangeIndex.from_range PR01,SA01" \
-        -i "pandas.Series.dt.freq GL08" \
         -i "pandas.Series.dt.unit GL08" \
         -i "pandas.Series.pad PR01,SA01" \
         -i "pandas.Timedelta.max PR02" \
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index e2dc71f68a65b..c404323a1168c 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -373,6 +373,28 @@ def to_pydatetime(self) -> Series:
 
     @property
     def freq(self):
+        """
+        Tries to return a string representing a frequency generated by infer_freq.
+
+        Returns None if it can't autodetect the frequency.
+
+        See Also
+        --------
+        Series.dt.to_period : Cast to PeriodArray/PeriodIndex at a particular
+            frequency.
+
+        Examples
+        --------
+        >>> ser = pd.Series(["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04"])
+        >>> ser = pd.to_datetime(ser)
+        >>> ser.dt.freq
+        'D'
+
+        >>> ser = pd.Series(["2022-01-01", "2024-01-01", "2026-01-01", "2028-01-01"])
+        >>> ser = pd.to_datetime(ser)
+        >>> ser.dt.freq
+        '2YS-JAN'
+        """
         return self._get_values().inferred_freq
 
     def isocalendar(self) -> DataFrame:

From 1c986d6213904fd7d9acc5622dc91d029d3f1218 Mon Sep 17 00:00:00 2001
From: Joseph Kleinhenz <kleinhenz.joseph@gmail.com>
Date: Wed, 20 Nov 2024 23:52:11 -0800
Subject: [PATCH 5/5] ENH: expose `to_pandas_kwargs` in `read_parquet` with
 pyarrow backend (#59654)

Co-authored-by: Joseph Kleinhenz <kleinhej@gene.com>
Co-authored-by: Xiao Yuan <yuanx749@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 doc/source/whatsnew/v3.0.0.rst  |  1 +
 pandas/io/_util.py              |  5 ++++-
 pandas/io/parquet.py            | 22 ++++++++++++++++++++--
 pandas/tests/io/test_parquet.py | 14 ++++++++++++++
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 5f7aed8ed9786..fbf2bed550c85 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -54,6 +54,7 @@ Other enhancements
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
 - :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
+- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
diff --git a/pandas/io/_util.py b/pandas/io/_util.py
index 21203ad036fc6..9778a404e23e0 100644
--- a/pandas/io/_util.py
+++ b/pandas/io/_util.py
@@ -60,9 +60,12 @@ def arrow_table_to_pandas(
     table: pyarrow.Table,
     dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
     null_to_int64: bool = False,
+    to_pandas_kwargs: dict | None = None,
 ) -> pd.DataFrame:
     pa = import_optional_dependency("pyarrow")
 
+    to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
+
     types_mapper: type[pd.ArrowDtype] | None | Callable
     if dtype_backend == "numpy_nullable":
         mapping = _arrow_dtype_mapping()
@@ -80,5 +83,5 @@ def arrow_table_to_pandas(
     else:
         raise NotImplementedError
 
-    df = table.to_pandas(types_mapper=types_mapper)
+    df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
     return df
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 116f228faca93..6a5a83088e986 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -242,6 +242,7 @@ def read(
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs: dict[str, Any] | None = None,
         **kwargs,
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
@@ -266,7 +267,11 @@ def read(
                     "make_block is deprecated",
                     DeprecationWarning,
                 )
-                result = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
+                result = arrow_table_to_pandas(
+                    pa_table,
+                    dtype_backend=dtype_backend,
+                    to_pandas_kwargs=to_pandas_kwargs,
+                )
 
             if pa_table.schema.metadata:
                 if b"PANDAS_ATTRS" in pa_table.schema.metadata:
@@ -347,6 +352,7 @@ def read(
         filters=None,
         storage_options: StorageOptions | None = None,
         filesystem=None,
+        to_pandas_kwargs: dict | None = None,
         **kwargs,
     ) -> DataFrame:
         parquet_kwargs: dict[str, Any] = {}
@@ -362,6 +368,10 @@ def read(
             raise NotImplementedError(
                 "filesystem is not implemented for the fastparquet engine."
             )
+        if to_pandas_kwargs is not None:
+            raise NotImplementedError(
+                "to_pandas_kwargs is not implemented for the fastparquet engine."
+            )
         path = stringify_path(path)
         handles = None
         if is_fsspec_url(path):
@@ -452,7 +462,7 @@ def to_parquet(
         .. versionadded:: 2.1.0
 
     kwargs
-        Additional keyword arguments passed to the engine
+        Additional keyword arguments passed to the engine.
 
     Returns
     -------
@@ -491,6 +501,7 @@ def read_parquet(
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
     filesystem: Any = None,
     filters: list[tuple] | list[list[tuple]] | None = None,
+    to_pandas_kwargs: dict | None = None,
     **kwargs,
 ) -> DataFrame:
     """
@@ -564,6 +575,12 @@ def read_parquet(
 
         .. versionadded:: 2.1.0
 
+    to_pandas_kwargs : dict | None, default None
+        Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
+        when ``engine="pyarrow"``.
+
+        .. versionadded:: 3.0.0
+
     **kwargs
         Any additional kwargs are passed to the engine.
 
@@ -636,5 +653,6 @@ def read_parquet(
         storage_options=storage_options,
         dtype_backend=dtype_backend,
         filesystem=filesystem,
+        to_pandas_kwargs=to_pandas_kwargs,
         **kwargs,
     )
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 31cdb6626d237..7919bb956dc7a 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1172,6 +1172,20 @@ def test_non_nanosecond_timestamps(self, temp_file):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_maps_as_pydicts(self, pa):
+        pyarrow = pytest.importorskip("pyarrow", "13.0.0")
+
+        schema = pyarrow.schema(
+            [("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]
+        )
+        df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
+        check_round_trip(
+            df,
+            pa,
+            write_kwargs={"schema": schema},
+            read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
+        )
+
 
 class TestParquetFastParquet(Base):
     def test_basic(self, fp, df_full, request):