Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pyarrow as a direct dependency #2228

Merged
merged 4 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pythonbuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Test with coverage
run: |
Expand Down Expand Up @@ -98,7 +98,7 @@ jobs:
run: |
pip install uv
make setup-global-uv
uv pip uninstall --system pandas
uv pip uninstall --system pandas pyarrow
uv pip freeze
- name: Run extras unit tests with coverage
# Skip this step if running on python 3.12 due to https://github.com/tensorflow/tensorflow/issues/62003
Expand Down
1 change: 1 addition & 0 deletions dev-requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ autoflake
pillow
numpy
pandas
pyarrow
scikit-learn
types-requests
prometheus-client
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ dependencies = [
"marshmallow-jsonschema>=0.12.0",
"mashumaro>=3.11",
"protobuf!=4.25.0",
"pyarrow",
"pygments",
"python-json-logger>=2.0.0",
"pytimeparse>=1.1.8",
Expand Down
3 changes: 2 additions & 1 deletion tests/flytekit/unit/core/test_type_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import List, Optional, Type

import mock
import pyarrow as pa
import pytest
import typing_extensions
from dataclasses_json import DataClassJsonMixin, dataclass_json
Expand Down Expand Up @@ -1408,9 +1407,11 @@ class UnsupportedEnumValues(Enum):
BLUE = 3


@pytest.mark.skipif("polars" not in sys.modules, reason="pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
def test_structured_dataset_type():
import pandas as pd
import pyarrow as pa
from pandas._testing import assert_frame_equal

name = "Name"
Expand Down
3 changes: 2 additions & 1 deletion tests/flytekit/unit/deck/test_renderer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import sys

import pyarrow as pa
import pytest

from flytekit.deck.renderer import DEFAULT_MAX_COLS, DEFAULT_MAX_ROWS, ArrowRenderer, TopFrameRenderer


@pytest.mark.skipif("pyarrow" not in sys.modules, reason="Pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
@pytest.mark.parametrize(
"rows, cols, max_rows, expected_max_rows, max_cols, expected_max_cols",
Expand All @@ -23,6 +23,7 @@
)
def test_renderer(rows, cols, max_rows, expected_max_rows, max_cols, expected_max_cols):
import pandas as pd
import pyarrow as pa

df = pd.DataFrame({f"abc-{k}": list(range(rows)) for k in range(cols)})
pa_df = pa.Table.from_pandas(df)
Expand Down
4 changes: 2 additions & 2 deletions tests/flytekit/unit/lazy_module/test_lazy_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@


def test_lazy_module():
mod = lazy_module("pyarrow")
assert mod.__name__ == "pyarrow"
mod = lazy_module("click")
assert mod.__name__ == "click"
mod = lazy_module("fake_module")
assert isinstance(mod, LazyModule)
with pytest.raises(ImportError, match="Module fake_module is not yet installed."):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
import sys
import typing

import pyarrow as pa
import pytest
from typing_extensions import Annotated

from flytekit import kwtypes, task


@pytest.mark.skipif("pyarrow" not in sys.modules, reason="Pyarrow is not installed.")
@pytest.mark.skipif("pandas" not in sys.modules, reason="Pandas is not installed.")
def test_structured_dataset_wf():
import pandas as pd
import pyarrow as pa

cols = kwtypes(Name=str, Age=int)
subset_cols = kwtypes(Name=str)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from collections import OrderedDict

import google.cloud.bigquery
import pyarrow as pa
import pytest
from fsspec.utils import get_protocol
from typing_extensions import Annotated
Expand Down Expand Up @@ -34,6 +33,7 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")

my_cols = kwtypes(w=typing.Dict[str, typing.Dict[str, int]], x=typing.List[typing.List[int]], y=int, z=str)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import typing

import mock
import pyarrow as pa
import pytest

from flytekit.core import context_manager
Expand All @@ -17,6 +16,7 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")
my_cols = kwtypes(w=typing.Dict[str, typing.Dict[str, int]], x=typing.List[typing.List[int]], y=int, z=str)
fields = [("some_int", pa.int32()), ("some_string", pa.string())]
arrow_schema = pa.schema(fields)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from dataclasses import dataclass

import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
import pytest
from typing_extensions import Annotated

Expand All @@ -24,6 +22,8 @@
)

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")
pq = pytest.importorskip("pyarrow.parquet")

PANDAS_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
NUMPY_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from dataclasses import dataclass

import pyarrow as pa
import pytest
from typing_extensions import Annotated

from flytekit import FlyteContextManager, StructuredDataset, kwtypes, task, workflow

pd = pytest.importorskip("pandas")
pa = pytest.importorskip("pyarrow")

PANDAS_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
NUMPY_PATH = FlyteContextManager.current_context().file_access.get_random_local_directory()
Expand Down
Loading