-
Notifications
You must be signed in to change notification settings - Fork 912
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
244 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ strings | |
find_multiple | ||
findall | ||
padding | ||
json | ||
regex_flags | ||
regex_program | ||
repeat | ||
|
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/json.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
==== | ||
json | ||
==== | ||
|
||
.. automodule:: pylibcudf.strings.json | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,84 +1,26 @@ | ||
# Copyright (c) 2021-2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
import pylibcudf as plc | ||
from pylibcudf.strings.json cimport GetJsonObjectOptions | ||
|
||
from cudf.core.buffer import acquire_spill_lock | ||
|
||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.column.column_view cimport column_view | ||
from pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from pylibcudf.libcudf.strings.json cimport ( | ||
get_json_object as cpp_get_json_object, | ||
get_json_object_options, | ||
) | ||
|
||
from cudf._lib.column cimport Column | ||
from cudf._lib.scalar cimport DeviceScalar | ||
|
||
|
||
@acquire_spill_lock() | ||
def get_json_object( | ||
Column col, object py_json_path, GetJsonObjectOptions options): | ||
Column col, | ||
object py_json_path, | ||
GetJsonObjectOptions options | ||
): | ||
""" | ||
Apply a JSONPath string to all rows in an input column | ||
of json strings. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
cdef column_view col_view = col.view() | ||
cdef DeviceScalar json_path = py_json_path.device_value | ||
|
||
cdef const string_scalar* scalar_json_path = <const string_scalar*>( | ||
json_path.get_raw_ptr() | ||
plc_column = plc.strings.json.get_json_object( | ||
col.to_pylibcudf(mode="read"), | ||
py_json_path.device_value.c_value, | ||
options | ||
) | ||
|
||
with nogil: | ||
c_result = move(cpp_get_json_object( | ||
col_view, | ||
scalar_json_path[0], | ||
options.options, | ||
)) | ||
|
||
return Column.from_unique_ptr(move(c_result)) | ||
|
||
|
||
cdef class GetJsonObjectOptions: | ||
cdef get_json_object_options options | ||
|
||
def __init__( | ||
self, | ||
*, | ||
allow_single_quotes=False, | ||
strip_quotes_from_single_strings=True, | ||
missing_fields_as_nulls=False | ||
): | ||
self.options.set_allow_single_quotes(allow_single_quotes) | ||
self.options.set_strip_quotes_from_single_strings( | ||
strip_quotes_from_single_strings | ||
) | ||
self.options.set_missing_fields_as_nulls(missing_fields_as_nulls) | ||
|
||
@property | ||
def allow_single_quotes(self): | ||
return self.options.get_allow_single_quotes() | ||
|
||
@property | ||
def strip_quotes_from_single_strings(self): | ||
return self.options.get_strip_quotes_from_single_strings() | ||
|
||
@property | ||
def missing_fields_as_nulls(self): | ||
return self.options.get_missing_fields_as_nulls() | ||
|
||
@allow_single_quotes.setter | ||
def allow_single_quotes(self, val): | ||
self.options.set_allow_single_quotes(val) | ||
|
||
@strip_quotes_from_single_strings.setter | ||
def strip_quotes_from_single_strings(self, val): | ||
self.options.set_strip_quotes_from_single_strings(val) | ||
|
||
@missing_fields_as_nulls.setter | ||
def missing_fields_as_nulls(self, val): | ||
self.options.set_missing_fields_as_nulls(val) | ||
return Column.from_pylibcudf(plc_column) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.strings.json cimport get_json_object_options | ||
from pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cdef class GetJsonObjectOptions: | ||
cdef get_json_object_options options | ||
|
||
|
||
cpdef Column get_json_object( | ||
Column col, | ||
Scalar json_path, | ||
GetJsonObjectOptions options=* | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from cython.operator cimport dereference | ||
from libcpp cimport bool | ||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from pylibcudf.libcudf.strings cimport json as cpp_json | ||
from pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cdef class GetJsonObjectOptions: | ||
"""Settings for ``get_json_object()``""" | ||
def __init__( | ||
self, | ||
*, | ||
allow_single_quotes=False, | ||
strip_quotes_from_single_strings=True, | ||
missing_fields_as_nulls=False | ||
): | ||
self.set_allow_single_quotes(allow_single_quotes) | ||
self.set_strip_quotes_from_single_strings( | ||
strip_quotes_from_single_strings | ||
) | ||
self.set_missing_fields_as_nulls(missing_fields_as_nulls) | ||
|
||
def get_allow_single_quotes(self): | ||
""" | ||
Returns true/false depending on whether single-quotes for representing strings | ||
are allowed. | ||
Returns | ||
------- | ||
bool | ||
true if single-quotes are allowed, false otherwise. | ||
""" | ||
return self.options.get_allow_single_quotes() | ||
|
||
def get_strip_quotes_from_single_strings(self): | ||
""" | ||
Returns true/false depending on whether individually returned string values have | ||
their quotes stripped. | ||
Returns | ||
------- | ||
bool | ||
true if individually returned string values have their quotes stripped. | ||
""" | ||
return self.options.get_strip_quotes_from_single_strings() | ||
|
||
def get_missing_fields_as_nulls(self): | ||
""" | ||
Whether a field not contained by an object is to be interpreted as null. | ||
Returns | ||
------- | ||
bool | ||
true if missing fields are interpreted as null. | ||
""" | ||
return self.options.get_missing_fields_as_nulls() | ||
|
||
def set_allow_single_quotes(self, bool val): | ||
""" | ||
Set whether single-quotes for strings are allowed. | ||
Parameters | ||
---------- | ||
val : bool | ||
Whether to allow single quotes | ||
Returns | ||
------- | ||
None | ||
""" | ||
self.options.set_allow_single_quotes(val) | ||
|
||
def set_strip_quotes_from_single_strings(self, bool val): | ||
""" | ||
Set whether individually returned string values have their quotes stripped. | ||
Parameters | ||
---------- | ||
val : bool | ||
Whether to strip quotes from single strings. | ||
Returns | ||
------- | ||
None | ||
""" | ||
self.options.set_strip_quotes_from_single_strings(val) | ||
|
||
def set_missing_fields_as_nulls(self, bool val): | ||
""" | ||
Set whether missing fields are interpreted as null. | ||
Parameters | ||
---------- | ||
val : bool | ||
Whether to treat missing fields as nulls. | ||
Returns | ||
------- | ||
None | ||
""" | ||
self.options.set_missing_fields_as_nulls(val) | ||
|
||
|
||
cpdef Column get_json_object( | ||
Column col, | ||
Scalar json_path, | ||
GetJsonObjectOptions options=None | ||
): | ||
""" | ||
Apply a JSONPath string to all rows in an input strings column. | ||
For details, see :cpp:func:`cpp::strings::get_json_object` | ||
Parameters | ||
---------- | ||
col : Column | ||
The input strings column. Each row must contain a valid json string. | ||
json_path : Scalar | ||
The JSONPath string to be applied to each row. | ||
options : GetJsonObjectOptions | ||
Options for controlling the behavior of the function. | ||
Returns | ||
------- | ||
Column | ||
New strings column containing the retrieved json object strings. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
cdef string_scalar* c_json_path = <string_scalar*>( | ||
json_path.c_obj.get() | ||
) | ||
if options is None: | ||
options = GetJsonObjectOptions() | ||
|
||
cdef cpp_json.get_json_object_options c_options = options.options | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_json.get_json_object( | ||
col.view(), | ||
dereference(c_json_path), | ||
c_options | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
Oops, something went wrong.