Skip to content

Commit

Permalink
chore(python): Fix type stubs for updated module structure (#606)
Browse files Browse the repository at this point in the history
This PR updates the type stubs for the Cython modules to reflect the
updated structure. I don't think this mechanism for generating stubs is
the best long-term strategy (requires remembering to do so!)...we could
also do something like omit the generated stubs from the source tree and
generate them only before packaging; however, this would make them not
useful for local development (which is one of the primary short-term
benefits of including them).
  • Loading branch information
paleolimbot authored Sep 19, 2024
1 parent 3488ff1 commit 499d865
Show file tree
Hide file tree
Showing 11 changed files with 1,067 additions and 22 deletions.
15 changes: 6 additions & 9 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,12 @@ jobs:
- name: Check type stubs
if: success() && matrix.python-version == '3.12'
run: |
pip install mypy "black==22.3.0"
python/generate_type_stubs.sh
if git diff --name-only | grep -e "\\.pxi$"; then
echo "Type stubs were changed. Update them with python/generate_type_stubs.sh."
fi
stubtest nanoarrow._lib
stubtest nanoarrow._ipc_lib
pip install mypy
cd src/nanoarrow
for mod in $(find . -name "*.pyx" | sed -e "s|./||" -e "s|.pyx||"); do
cat $mod
stubtest "nanoarrow.$mod"
done
- name: Run doctests
if: success() && matrix.python-version == '3.12'
Expand Down
26 changes: 13 additions & 13 deletions python/generate_type_stubs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,7 @@ SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

pushd "${SOURCE_DIR}"

# Generate stubs using mypy
stubgen --module nanoarrow._lib --include-docstrings -o build/tmp
stubgen --module nanoarrow._ipc_lib --include-docstrings -o build/tmp

# Add license to the start of the files
# We'll add the license to the start of the files
LICENSE='
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
Expand All @@ -47,13 +43,17 @@ LICENSE='
# under the License.
'

echo "$LICENSE" > src/nanoarrow/_lib.pyi
cat build/tmp/nanoarrow/_lib.pyi >> src/nanoarrow/_lib.pyi

echo "$LICENSE" > src/nanoarrow/_ipc_lib.pyi
cat build/tmp/nanoarrow/_ipc_lib.pyi >> src/nanoarrow/_ipc_lib.pyi

# Reformat stubs
black src/nanoarrow/*.pyi
# Remove old stubs
find src/nanoarrow -name "*.pyi" -delete

# Generate new ones
pushd src/nanoarrow
for mod in $(find . -name "*.pyx" | sed -e "s|./||" -e "s|.pyx||"); do
stubgen --module "nanoarrow.${mod}" --include-docstrings -o ../../build/tmp
echo "$LICENSE" > "${mod}.pyi"
cat "../../build/tmp/nanoarrow/${mod}.pyi" >> "${mod}.pyi"
black "${mod}.pyi"
done
popd

popd
187 changes: 187 additions & 0 deletions python/src/nanoarrow/_array.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import _cython_3_0_11
import nanoarrow._device
from _typeshed import Incomplete
from nanoarrow._device import DeviceType as DeviceType
from typing import ClassVar

DEVICE_CPU: nanoarrow._device.Device
__reduce_cython__: _cython_3_0_11.cython_function_or_method
__setstate_cython__: _cython_3_0_11.cython_function_or_method
__test__: dict

class CArray:
__pyx_vtable__: ClassVar[PyCapsule] = ...
buffers: Incomplete
children: Incomplete
device_id: Incomplete
device_type: Incomplete
device_type_id: Incomplete
dictionary: Incomplete
length: Incomplete
n_buffers: Incomplete
n_children: Incomplete
null_count: Incomplete
offset: Incomplete
schema: Incomplete
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
@staticmethod
def allocate(*args, **kwargs):
"""Allocate a released ArrowArray"""
def child(self, *args, **kwargs): ...
def is_valid(self, *args, **kwargs):
"""Check for a non-null and non-released underlying ArrowArray"""
def view(self, *args, **kwargs):
"""Allocate a :class:`CArrayView` to access the buffers of this array"""
def __arrow_c_array__(self, *args, **kwargs):
"""
Get a pair of PyCapsules containing a C ArrowArray representation of the object.
Parameters
----------
requested_schema : PyCapsule | None
A PyCapsule containing a C ArrowSchema representation of a requested
schema. Not supported.
Returns
-------
Tuple[PyCapsule, PyCapsule]
A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
respectively.
"""
def __getitem__(self, index):
"""Return self[key]."""
def __len__(self) -> int:
"""Return len(self)."""
def __reduce__(self): ...

class CArrayBuilder:
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
@staticmethod
def allocate(*args, **kwargs):
"""Create a CArrayBuilder
Allocates memory for an ArrowArray and populates it with nanoarrow's
ArrowArray private_data/release callback implementation. This should
usually be followed by :meth:`init_from_type` or :meth:`init_from_schema`.
"""
def append_bytes(self, *args, **kwargs): ...
def append_strings(self, *args, **kwargs): ...
def finish(self, *args, **kwargs):
"""Finish building this array
Performs any steps required to return a valid ArrowArray and optionally
validates the output to ensure that the result is valid (given the information
the array has available to it).
Parameters
----------
validation_level : None, "full", "default", "minimal", or "none", optional
Explicitly define a validation level or use None to perform default
validation if possible. Validation may not be possible if children
were set that were not created by nanoarrow.
"""
def finish_device(self, *args, **kwargs):
"""Finish building this array and export to an ArrowDeviceArray
Calls :meth:`finish`, propagating device information into an ArrowDeviceArray.
"""
def init_from_schema(self, *args, **kwargs): ...
def init_from_type(self, *args, **kwargs): ...
def is_empty(self, *args, **kwargs):
"""Check if any items have been appended to this builder"""
def resolve_null_count(self, *args, **kwargs):
"""Ensure the output null count is synchronized with existing buffers
Note that this will not attempt to access non-CPU buffers such that
:attr:`null_count` might still be -1 after calling this method.
"""
def set_buffer(self, *args, **kwargs):
"""Set an ArrowArray buffer
Sets a buffer of this ArrowArray such the pointer at array->buffers[i] is
equal to buffer->data and such that the buffer's lifcycle is managed by
the array. If move is True, the input Python object that previously wrapped
the ArrowBuffer will be invalidated, which is usually the desired behaviour
if you built or imported a buffer specifically to build this array. If move
is False (the default), this function will a make a shallow copy via another
layer of Python object wrapping.
"""
def set_child(self, *args, **kwargs):
"""Set an ArrowArray child
Set a child of this array by performing a show copy or optionally
transferring ownership to this object. The initialized child array
must have been initialized before this call by initializing this
builder with a schema containing the correct number of children.
"""
def set_length(self, *args, **kwargs): ...
def set_null_count(self, *args, **kwargs): ...
def set_offset(self, *args, **kwargs): ...
def start_appending(self, *args, **kwargs):
"""Use append mode for building this ArrowArray
Calling this method is required to produce a valid array prior to calling
:meth:`append_strings` or `append_bytes`.
"""
def __reduce__(self): ...

class CArrayView:
buffers: Incomplete
children: Incomplete
dictionary: Incomplete
layout: Incomplete
length: Incomplete
n_buffers: Incomplete
n_children: Incomplete
null_count: Incomplete
offset: Incomplete
storage_type: Incomplete
storage_type_id: Incomplete
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
def buffer(self, *args, **kwargs): ...
def buffer_type(self, *args, **kwargs): ...
def child(self, *args, **kwargs): ...
@staticmethod
def from_array(*args, **kwargs): ...
@staticmethod
def from_schema(*args, **kwargs): ...
def __len__(self) -> int:
"""Return len(self)."""
def __reduce__(self): ...

class CDeviceArray:
array: Incomplete
device_id: Incomplete
device_type: Incomplete
device_type_id: Incomplete
schema: Incomplete
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
def view(self, *args, **kwargs): ...
def __arrow_c_array__(self, *args, **kwargs): ...
def __arrow_c_device_array__(self, *args, **kwargs): ...
def __reduce__(self): ...
128 changes: 128 additions & 0 deletions python/src/nanoarrow/_array_stream.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import _cython_3_0_11
import types
from _typeshed import Incomplete
from typing import ClassVar

__reduce_cython__: _cython_3_0_11.cython_function_or_method
__setstate_cython__: _cython_3_0_11.cython_function_or_method
__test__: dict

class CArrayStream:
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
@staticmethod
def allocate(*args, **kwargs):
"""Allocate a released ArrowArrayStream"""
@staticmethod
def from_c_arrays(*args, **kwargs):
"""Create an ArrowArrayStream from an existing set of arrays
Given a previously resolved list of arrays, create an ArrowArrayStream
representation of the sequence of chunks.
Parameters
----------
arrays : List[CArray]
A list of arrays to use as batches.
schema : CSchema
The schema that will be returned. Must be type equal with the schema
of each array (this is checked if validate is ``True``)
move : bool, optional
If True, transfer ownership from each array instead of creating a
shallow copy. This is only safe if the caller knows the origin of the
arrays and knows that they will not be accessed after this stream has been
created.
validate : bool, optional
If True, enforce type equality between the provided schema and the schema
of each array.
"""
def get_next(self, *args, **kwargs):
"""Get the next Array from this stream
Raises StopIteration when there are no more arrays in this stream.
"""
def get_schema(self, *args, **kwargs):
"""Get the schema associated with this stream
Calling this method will always issue a call to the underlying stream's
get_schema callback.
"""
def is_valid(self, *args, **kwargs):
"""Check for a non-null and non-released underlying ArrowArrayStream"""
def release(self, *args, **kwargs):
"""Explicitly call the release callback of this stream"""
def __arrow_c_stream__(self, *args, **kwargs):
"""
Export the stream as an Arrow C stream PyCapsule.
Parameters
----------
requested_schema : PyCapsule | None
A PyCapsule containing a C ArrowSchema representation of a requested
schema. Not supported.
Returns
-------
PyCapsule
"""
def __enter__(self): ...
def __exit__(
self,
type: type[BaseException] | None,
value: BaseException | None,
traceback: types.TracebackType | None,
): ...
def __iter__(self):
"""Implement iter(self)."""
def __next__(self): ...
def __reduce__(self): ...

class CMaterializedArrayStream:
__pyx_vtable__: ClassVar[PyCapsule] = ...
arrays: Incomplete
n_arrays: Incomplete
schema: Incomplete
@classmethod
def __init__(cls, *args, **kwargs) -> None:
"""Create and return a new object. See help(type) for accurate signature."""
def array(self, *args, **kwargs): ...
def child(self, *args, **kwargs): ...
@staticmethod
def from_c_array(*args, **kwargs):
""" "Create a materialized array stream from a single array"""
@staticmethod
def from_c_array_stream(*args, **kwargs):
""" "Create a materialized array stream from an unmaterialized ArrowArrayStream"""
@staticmethod
def from_c_arrays(*args, **kwargs):
""" "Create a materialized array stream from an existing iterable of arrays
This is slightly more efficient than creating a stream and then consuming it
because the implementation can avoid a shallow copy of each array.
"""
def __arrow_c_stream__(self, *args, **kwargs): ...
def __getitem__(self, index):
"""Return self[key]."""
def __iter__(self):
"""Implement iter(self)."""
def __len__(self) -> int:
"""Return len(self)."""
def __reduce__(self): ...
1 change: 1 addition & 0 deletions python/src/nanoarrow/_array_stream.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ cdef class CArrayStream:
return array_stream_capsule

def _addr(self) -> int:
"""test to see if this causes a ci fail"""
return <uintptr_t>self._ptr

def is_valid(self) -> bool:
Expand Down
Loading

0 comments on commit 499d865

Please sign in to comment.