pandas-dev · ldlin1 · Dec 7, 2024 · Dec 8, 2024 · Dec 8, 2024 · Dec 8, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -651,6 +651,8 @@ Conversion
 - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
 - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
 - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
+- Bug in :meth:`Ops.logical_op` not correctly casting numpy-backed string arrays to boolean when used in logical operations with other boolean arrays (:issue:`60234`)
+- Bug in :meth:`ArrowExtensionArray._evaluate_op_method` not correctly casting pyarrow-backed string arrays to boolean when used in logical operations with other boolean arrays (:issue:`60234`)
 
 Strings
 ^^^^^^^

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -90,12 +90,12 @@
     }
 
     ARROW_LOGICAL_FUNCS = {
-        "and_": pc.and_kleene,
-        "rand_": lambda x, y: pc.and_kleene(y, x),
-        "or_": pc.or_kleene,
-        "ror_": lambda x, y: pc.or_kleene(y, x),
-        "xor": pc.xor,
-        "rxor": lambda x, y: pc.xor(y, x),
+        "and_": lambda x, y: pc.and_kleene(*cast_for_logical(x, y)),
+        "rand_": lambda x, y: pc.and_kleene(*cast_for_logical(y, x)),
+        "or_": lambda x, y: pc.or_kleene(*cast_for_logical(x, y)),
+        "ror_": lambda x, y: pc.or_kleene(*cast_for_logical(y, x)),
+        "xor": lambda x, y: pc.xor(*cast_for_logical(x, y)),
+        "rxor": lambda x, y: pc.xor(*cast_for_logical(y, x)),
     }
 
     ARROW_BIT_WISE_FUNCS = {
@@ -107,6 +107,20 @@
         "rxor": lambda x, y: pc.bit_wise_xor(y, x),
     }
 
+    def convert_string_to_boolean_array(arr):
+        if pa.types.is_string(arr.type) or pa.types.is_large_string(arr.type):
+            string_to_bool = [bool(value.as_py()) for value in arr]
+            arr = pc.cast(string_to_bool, pa.bool_())
+        return arr
+
+    def cast_for_logical(x, y):
+        is_x_bool = pa.types.is_boolean(x.type)
+        is_y_bool = pa.types.is_boolean(y.type)
+
+        if (is_x_bool != is_y_bool):
+            return convert_string_to_boolean_array(x), convert_string_to_boolean_array(y)
+        return x, y
+
     def cast_for_truediv(
         arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
     ) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]:
@@ -822,6 +836,13 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
             result = pc_func(self._pa_array, other)
         except pa.ArrowNotImplementedError as err:
             raise TypeError(self._op_method_error_message(other_original, op)) from err
+
+        if (op.__name__ in ARROW_LOGICAL_FUNCS 
+            and (isinstance(self, pa.lib.BooleanArray) !=
+            isinstance(other, pa.lib.BooleanArray))
+            ):
+            return pc.cast(result, pa.bool_())
+
         return type(self)(result)
 
     def _logical_method(self, other, op) -> Self:

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
@@ -435,6 +435,13 @@ def fill_bool(x, left=None):
     rvalues = right
 
     if should_extension_dispatch(lvalues, rvalues):
+        # Must cast if logical op between a boolean array and numpy-backed string array
+        if ((lvalues.dtype == np.bool_ and rvalues.dtype == "string[python]")
+            or (lvalues.dtype == "string[python]" and rvalues.dtype == np.bool_)
+        ):
+            lvalues = lvalues.astype(bool)
+            rvalues = rvalues.astype(bool)
+
         # Call the method on lvalues
         res_values = op(lvalues, rvalues)
 

diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -740,3 +740,54 @@ def test_tolist(dtype):
     result = arr.tolist()
     expected = vals
     tm.assert_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[pyarrow]"])
+def test_or_pyarrow_string(dtype):
+    with pd.option_context("future.infer_string", True):
+        ser1 = pd.Series([False, False])
+        ser2 = pd.Series(["", "b"], dtype=dtype)
+        result = ser1 | ser2
+        expected = pd.Series([False, True], dtype=bool)
+        tm.assert_series_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[pyarrow]"])
+def test_and_pyarrow_string(dtype):
+    with pd.option_context("future.infer_string", True):
+        ser1 = pd.Series([False, False])
+        ser2 = pd.Series(["", "b"], dtype=dtype)
+        result = ser1 & ser2
+        expected = pd.Series([False, False], dtype=bool)
+        tm.assert_series_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[pyarrow]"])
+def test_xor_pyarrow_string(dtype):
+    with pd.option_context("future.infer_string", True):
+        ser1 = pd.Series([False, False])
+        ser2 = pd.Series(["", "b"], dtype=dtype)
+        result = ser1 ^ ser2
+        expected = pd.Series([False, True], dtype=bool)
+        tm.assert_series_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[python]"])
+def test_or_numpy_string(dtype):
+    ser1 = pd.Series([False, False])
+    ser2 = pd.Series(["", "b"], dtype=dtype)
+    result = ser1 | ser2
+    expected = pd.Series([False, True], dtype=bool)
+    tm.assert_series_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[python]"])
+def test_and_numpy_string(dtype):
+    ser1 = pd.Series([False, False])
+    ser2 = pd.Series(["", "b"], dtype=dtype)
+    result = ser1 & ser2
+    expected = pd.Series([False, False], dtype=bool)
+    tm.assert_series_equal(result, expected)
+
+@pytest.mark.parametrize("dtype", ["string[python]"])
+def test_xor_numpy_string(dtype):
+    ser1 = pd.Series([False, False])
+    ser2 = pd.Series(["", "b"], dtype=dtype)
+    result = ser1 ^ ser2
+    expected = pd.Series([False, True], dtype=bool)
+    tm.assert_series_equal(result, expected)