From 824a2738dc0b2cdd8dce2d4256c9dc34bb589e6b Mon Sep 17 00:00:00 2001
From: Hedeer El Showk <144284759+hedeershowk@users.noreply.github.com>
Date: Wed, 27 Sep 2023 12:50:45 -0400
Subject: [PATCH] BUG: add pyarrow autogenerated prefix (#55115)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add pyarrow autogenerated prefix

* whats new bug fix

* test with no head and pyarrow

* only test pyarrow

* BUG: This fixes #55009 (`raw=True` caused `apply` method of `DataFrame` to ignore passed arguments) (#55089)

* fixes #55009

* update documentation

* write documentation

* add test

* change formatting

* cite DataDrame directly in docs

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* PR review feedback

* Update doc/source/whatsnew/v2.2.0.rst

Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>

* alphabetical whatsnew

---------

Co-authored-by: Martin Šícho <sichom@vscht.cz>
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 doc/source/whatsnew/v2.2.0.rst            |  1 +
 pandas/io/parsers/arrow_parser_wrapper.py |  6 ++++++
 pandas/tests/io/parser/test_header.py     | 18 ++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 0760840f9950a..445b93705cde5 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -314,6 +314,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
+- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
 - Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
 
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 765a4ffcd2cb9..35965c90ee7fb 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -130,6 +130,12 @@ def handle_warning(invalid_row):
             )
         }
         self.convert_options["strings_can_be_null"] = "" in self.kwds["null_values"]
+        # autogenerated column names are prefixed with 'f' in pyarrow.csv
+        if self.header is None and "include_columns" in self.convert_options:
+            self.convert_options["include_columns"] = [
+                f"f{n}" for n in self.convert_options["include_columns"]
+            ]
+
         self.read_options = {
             "autogenerate_column_names": self.header is None,
             "skip_rows": self.header
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index d72174c40478e..d6eab59074dd6 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -684,3 +684,21 @@ def test_header_delim_whitespace(all_parsers):
     result = parser.read_csv(StringIO(data), delim_whitespace=True)
     expected = DataFrame({"a,b": ["1,2", "3,4"]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_no_header_pyarrow(pyarrow_parser_only):
+    parser = pyarrow_parser_only
+    data = """
+a,i,x
+b,j,y
+"""
+    result = parser.read_csv(
+        StringIO(data),
+        header=None,
+        usecols=[0, 1],
+        dtype="string[pyarrow]",
+        dtype_backend="pyarrow",
+        engine="pyarrow",
+    )
+    expected = DataFrame([["a", "i"], ["b", "j"]], dtype="string[pyarrow]")
+    tm.assert_frame_equal(result, expected)