-
Notifications
You must be signed in to change notification settings - Fork 912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Eagerly populate the class dict for cudf.pandas proxy types #14534
Changes from all commits
dba3586
bd69575
16b9340
8953944
3a27c54
19e3038
cef9adc
73e714a
483872a
1066c2b
68df373
cdef427
8c14c0c
fef2768
b810679
d0f094b
7ef0757
5d53462
de5cb7a
4d928eb
a8f3222
8e9eaad
180228d
fbefc7f
eba403c
16a0f21
764bd72
a29df18
3290cc9
fd6adce
2f4fdb6
964d95f
1923bda
ef5784c
f6b6d4a
f56deb9
917a0b5
3d3ff0a
c95289a
16a647d
9891545
e3dc345
c2ca0c4
a078724
fa542bb
9526807
e5bdfd2
624c71e
442593a
bee2c92
e3f7393
2e38b7d
30e9b59
616b206
169148b
840c34e
979926b
e79568e
106ea90
761ab6d
9d7bea2
6b70dd3
c32c437
e4dd410
64905ad
01e5efa
a8969de
1fe2627
c9c5e65
a8220b9
f31f9bd
0e7c843
b37dd00
040fdb6
fa4367c
13f9e48
9495db3
de4d5ec
5cbfcfa
c9126ed
aa03e09
2399eb2
9e580e7
0b5ef86
9ac1363
fe7cb14
1b91665
61034bd
db4d356
f6a7042
9c9dc95
8488a02
129cd81
c6914fd
925374e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,14 +107,16 @@ class _AccessorAttr: | |
""" | ||
|
||
def __init__(self, typ): | ||
self.__typ = typ | ||
self._typ = typ | ||
|
||
def __set_name__(self, owner, name): | ||
self._name = name | ||
|
||
def __get__(self, obj, cls=None): | ||
if obj is None: | ||
return self.__typ | ||
return self._typ | ||
else: | ||
# allow __getattr__ to handle this | ||
raise AttributeError() | ||
return _FastSlowAttribute(self._name).__get__(obj, type(obj)) | ||
|
||
|
||
def Timestamp_Timedelta__new__(cls, *args, **kwargs): | ||
|
@@ -214,6 +216,7 @@ def _DataFrame__dir__(self): | |
"__dir__": _DataFrame__dir__, | ||
"_constructor": _FastSlowAttribute("_constructor"), | ||
"_constructor_sliced": _FastSlowAttribute("_constructor_sliced"), | ||
"_accessors": set(), | ||
}, | ||
) | ||
|
||
|
@@ -236,6 +239,7 @@ def _DataFrame__dir__(self): | |
"cat": _AccessorAttr(_CategoricalAccessor), | ||
"_constructor": _FastSlowAttribute("_constructor"), | ||
"_constructor_expanddim": _FastSlowAttribute("_constructor_expanddim"), | ||
"_accessors": set(), | ||
}, | ||
) | ||
|
||
|
@@ -273,6 +277,9 @@ def Index__new__(cls, *args, **kwargs): | |
"__new__": Index__new__, | ||
"_constructor": _FastSlowAttribute("_constructor"), | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), | ||
"_accessors": set(), | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So these attributes we are wanting the fast path to have a chance to evaluate and not automatically be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah so in this case since it's This part of the eager population I'm not too thrilled about. These private variables are not guaranteed to be stable. It would be nice if any slow attribute was attempted to return a proxy object so we didn't have to specify attributes like this but I suppose that can be looked at in a follow up There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, there are still such cases we can address in follow-ups. At this point the scope of this PR has kept on expanding, that's why I haven't addressed additional issues in this PR. |
||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
|
@@ -337,7 +344,11 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=lambda fast: fast.to_pandas(), | ||
slow_to_fast=cudf.from_pandas, | ||
bases=(Index,), | ||
additional_attributes={"__init__": _DELETE}, | ||
additional_attributes={ | ||
"__init__": _DELETE, | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
DatetimeArray = make_final_proxy_type( | ||
|
@@ -346,6 +357,10 @@ def Index__new__(cls, *args, **kwargs): | |
pd.arrays.DatetimeArray, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
DatetimeTZDtype = make_final_proxy_type( | ||
|
@@ -364,7 +379,11 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=lambda fast: fast.to_pandas(), | ||
slow_to_fast=cudf.from_pandas, | ||
bases=(Index,), | ||
additional_attributes={"__init__": _DELETE}, | ||
additional_attributes={ | ||
"__init__": _DELETE, | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
NumpyExtensionArray = make_final_proxy_type( | ||
|
@@ -385,6 +404,10 @@ def Index__new__(cls, *args, **kwargs): | |
pd.arrays.TimedeltaArray, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
PeriodIndex = make_final_proxy_type( | ||
|
@@ -394,7 +417,11 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
bases=(Index,), | ||
additional_attributes={"__init__": _DELETE}, | ||
additional_attributes={ | ||
"__init__": _DELETE, | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
PeriodArray = make_final_proxy_type( | ||
|
@@ -403,6 +430,11 @@ def Index__new__(cls, *args, **kwargs): | |
pd.arrays.PeriodArray, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), | ||
}, | ||
) | ||
|
||
PeriodDtype = make_final_proxy_type( | ||
|
@@ -464,6 +496,10 @@ def Index__new__(cls, *args, **kwargs): | |
pd.arrays.StringArray, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
StringDtype = make_final_proxy_type( | ||
|
@@ -472,7 +508,10 @@ def Index__new__(cls, *args, **kwargs): | |
pd.StringDtype, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, | ||
additional_attributes={ | ||
"__hash__": _FastSlowAttribute("__hash__"), | ||
"storage": _FastSlowAttribute("storage"), | ||
}, | ||
) | ||
|
||
BooleanArray = make_final_proxy_type( | ||
|
@@ -482,7 +521,9 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__") | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), | ||
}, | ||
) | ||
|
||
|
@@ -502,7 +543,9 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__") | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
|
@@ -586,7 +629,11 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=lambda fast: fast.to_pandas(), | ||
slow_to_fast=cudf.from_pandas, | ||
bases=(Index,), | ||
additional_attributes={"__init__": _DELETE}, | ||
additional_attributes={ | ||
"__init__": _DELETE, | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
IntervalArray = make_final_proxy_type( | ||
|
@@ -595,6 +642,10 @@ def Index__new__(cls, *args, **kwargs): | |
pd.arrays.IntervalArray, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
IntervalDtype = make_final_proxy_type( | ||
|
@@ -622,7 +673,9 @@ def Index__new__(cls, *args, **kwargs): | |
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__") | ||
"__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), | ||
"_data": _FastSlowAttribute("_data", private=True), | ||
"_mask": _FastSlowAttribute("_mask", private=True), | ||
}, | ||
) | ||
|
||
|
@@ -798,6 +851,14 @@ def Index__new__(cls, *args, **kwargs): | |
pd_Styler, | ||
fast_to_slow=_Unusable(), | ||
slow_to_fast=_Unusable(), | ||
additional_attributes={ | ||
"css": _FastSlowAttribute("css"), | ||
"ctx": _FastSlowAttribute("ctx"), | ||
"index": _FastSlowAttribute("ctx"), | ||
"data": _FastSlowAttribute("data"), | ||
"_display_funcs": _FastSlowAttribute("_display_funcs"), | ||
"table_styles": _FastSlowAttribute("table_styles"), | ||
}, | ||
) | ||
except ImportError: | ||
# Styler requires Jinja to be installed | ||
|
@@ -813,7 +874,7 @@ def _get_eval_locals_and_globals(level, local_dict=None, global_dict=None): | |
return local_dict, global_dict | ||
|
||
|
||
@register_proxy_func(pd.eval) | ||
@register_proxy_func(pd.core.computation.eval.eval) | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@nvtx.annotate( | ||
"CUDF_PANDAS_EVAL", | ||
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], | ||
|
@@ -843,6 +904,24 @@ def _eval( | |
) | ||
|
||
|
||
_orig_df_eval_method = DataFrame.eval | ||
|
||
|
||
@register_proxy_func(pd.core.accessor.register_dataframe_accessor) | ||
def _register_dataframe_accessor(name): | ||
return pd.core.accessor._register_accessor(name, DataFrame) | ||
|
||
|
||
@register_proxy_func(pd.core.accessor.register_series_accessor) | ||
def _register_series_accessor(name): | ||
return pd.core.accessor._register_accessor(name, Series) | ||
|
||
|
||
@register_proxy_func(pd.core.accessor.register_index_accessor) | ||
def _register_index_accessor(name): | ||
return pd.core.accessor._register_accessor(name, Index) | ||
|
||
|
||
@nvtx.annotate( | ||
"CUDF_PANDAS_DATAFRAME_EVAL", | ||
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], | ||
|
@@ -853,11 +932,14 @@ def _df_eval_method(self, *args, local_dict=None, global_dict=None, **kwargs): | |
local_dict, global_dict = _get_eval_locals_and_globals( | ||
level, local_dict, global_dict | ||
) | ||
return super(type(self), self).__getattr__("eval")( | ||
*args, local_dict=local_dict, global_dict=global_dict, **kwargs | ||
return _orig_df_eval_method( | ||
self, *args, local_dict=local_dict, global_dict=global_dict, **kwargs | ||
) | ||
|
||
|
||
_orig_query_eval_method = DataFrame.query | ||
|
||
|
||
@nvtx.annotate( | ||
"CUDF_PANDAS_DATAFRAME_QUERY", | ||
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], | ||
|
@@ -870,8 +952,8 @@ def _df_query_method(self, *args, local_dict=None, global_dict=None, **kwargs): | |
local_dict, global_dict = _get_eval_locals_and_globals( | ||
level, local_dict, global_dict | ||
) | ||
return super(type(self), self).__getattr__("query")( | ||
*args, local_dict=local_dict, global_dict=global_dict, **kwargs | ||
return _orig_query_eval_method( | ||
self, *args, local_dict=local_dict, global_dict=global_dict, **kwargs | ||
) | ||
|
||
|
||
|
@@ -1277,6 +1359,7 @@ def holiday_calendar_factory_wrapper(*args, **kwargs): | |
additional_attributes={"__hash__": _FastSlowAttribute("__hash__")}, | ||
) | ||
|
||
|
||
MonthBegin = make_final_proxy_type( | ||
"MonthBegin", | ||
_Unusable, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TIL a new part of the descriptors object model!
https://docs.python.org/3/howto/descriptor.html#closing-thoughts