Skip to content

Commit

Permalink
fix: python schema repr does not truncate output (#628)
Browse files Browse the repository at this point in the history
Fixes #466.

```
>>> import nanoarrow as na
>>> url = "https://github.com/apache/arrow-experiments/raw/main/data/arrow-commits/arrow-commits.arrows"
>>> schema = na.ArrayStream.from_url(url).schema
>>> schema
<Schema> non-nullable struct<commit: string, time: timestamp('us', 'UTC'), files: int32, merge: bool, message: string>
```
  • Loading branch information
danepitkin authored Sep 20, 2024
1 parent 08d14b3 commit 503548b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
13 changes: 8 additions & 5 deletions python/src/nanoarrow/_repr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ def make_class_label(obj, module=None):


def c_schema_to_string(obj, max_char_width=80):
max_char_width = max(max_char_width, 10)
c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width + 1)
if len(c_schema_string) > max_char_width:
return c_schema_string[: (max_char_width - 3)] + "..."
c_schema_string = ""
if max_char_width == 0:
c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width)
else:
return c_schema_string
max_char_width = max(max_char_width, 10)
c_schema_string = obj._to_string(recursive=True, max_chars=max_char_width + 1)
if len(c_schema_string) > max_char_width:
c_schema_string = c_schema_string[: (max_char_width - 3)] + "..."
return c_schema_string


def metadata_repr(obj, indent=0, max_char_width=80):
Expand Down
8 changes: 4 additions & 4 deletions python/src/nanoarrow/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,8 @@ def serialize(self, dst=None) -> Union[bytes, None]:
writer.write_stream(empty)

def __repr__(self) -> str:
return _schema_repr(self)
# An empty max_char_width prints the entire schema
return _schema_repr(self, max_char_width=0)

def __arrow_c_schema__(self):
return self._c_schema.__arrow_c_schema__()
Expand Down Expand Up @@ -1302,10 +1303,9 @@ def _schema_repr(obj, max_char_width=80, prefix="<Schema> ", include_metadata=Tr

modifiers_str = " ".join(modifiers)
first_line_prefix = f"{prefix}{modifiers_str}"
max_char_width = max(max_char_width - len(first_line_prefix), 0)

schema_str = _repr_utils.c_schema_to_string(
obj._c_schema, max_char_width - len(first_line_prefix)
)
schema_str = _repr_utils.c_schema_to_string(obj._c_schema, max_char_width)
lines.append(f"{first_line_prefix}{schema_str}")

if include_metadata:
Expand Down
18 changes: 18 additions & 0 deletions python/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,21 @@ def test_schema_serialize():
schema.serialize(out)
schema_roundtrip = na.ArrayStream.from_readable(out.getvalue()).schema
assert repr(schema_roundtrip) == repr(schema)


def test_schema_repr():
schema = na.struct(
{
"col1": na.int32(),
"col2": na.int16(),
"col3": na.string(),
"col4": na.timestamp(unit=na.TimeUnit.SECOND),
},
nullable=False,
)

assert repr(schema) == (
"<Schema> non-nullable struct"
"<col1: int32, col2: int16, col3: string, "
"col4: timestamp('s', '')>"
)

0 comments on commit 503548b

Please sign in to comment.