Skip to content

Commit

Permalink
Merge pull request #8 from landerlini/test_col_transf
Browse files Browse the repository at this point in the history
Fixed some corner case with ColumnTransformer ordering
  • Loading branch information
landerlini authored Jul 7, 2022
2 parents 41747fb + 5abb399 commit e8473b2
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 25 deletions.
20 changes: 13 additions & 7 deletions scikinC/ColumnTransformerConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def convert(self, model, name=None):

mapping = {k: c for k,_,c in transformers}

print (model, file=sys.stderr)
print (index_mapping, file=sys.stderr)
nFeatures = 1+max(index_mapping)

lines.append("""
Expand All @@ -70,15 +72,18 @@ def convert(self, model, name=None):
lines.append("""
ret [%(output)d] = input[%(column)d];
"""%dict(output=index_mapping.index(column), column=column))
print (f"F ORIG[{column}] -> TRANSF[{index_mapping.index(column)}]", file=sys.stderr)
else:
for iCol, column in enumerate(columns):
lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""%
dict(iCol=iCol, column=column))
lines.append (""" %(name)s (bufout, bufin);"""
print(f"F ORIG[{column}] -> BUFFER[{iCol}]", file=sys.stderr)
lines.append (""" %(name)s (bufout, bufin);"""
% dict(name=key))
for iCol, column in enumerate(columns):
lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d];"""%
dict(index_out=index_mapping.index(column), iCol=iCol))
print(f"F BUFFER[{iCol}] -> OUTPUT[{index_mapping.index(column)}]", file=sys.stderr)

lines.append ("""
return ret;
Expand All @@ -98,8 +103,6 @@ def convert(self, model, name=None):
if not all([t == 'passthrough' or hasattr(t, 'inverse_transform')] for _,t,_ in transformers):
return "\n".join(lines)

index_mapping = [index_mapping.index(c) for c in range(len(index_mapping))]

lines.append("""
extern "C"
FLOAT_T* %(name)s_inverse (FLOAT_T* ret, const FLOAT_T *input)
Expand All @@ -119,16 +122,19 @@ def convert(self, model, name=None):
for column in columns:
lines.append("""
ret [%(output)d] = input[%(column)d];
"""%dict(output=index_mapping.index(column), column=column))
else:
"""%dict(output=column, column=index_mapping.index(column)))
print (f"B OUTPUT[{index_mapping.index(column)}] -> INV_TRANSF[{column}]", file=sys.stderr)
else:
for iCol, column in enumerate(columns):
lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""%
dict(iCol=iCol, column=column))
dict(iCol=iCol, column=index_mapping.index(column)))
print(f"B OUTPUT[{index_mapping.index(column)}] -> BUFFER[{iCol}]", file=sys.stderr)
lines.append (""" %(name)s_inverse (bufout, bufin);"""%
dict(name=key))
for iCol, column in enumerate(columns):
lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d]; """ %
dict(index_out=index_mapping.index(column), iCol=iCol))
dict(index_out=column, iCol=iCol))
print(f"B BUFFER[{iCol}] -> INV_TRANSF[{column}]", file=sys.stderr)

lines.append ("""
return ret;
Expand Down
8 changes: 4 additions & 4 deletions scikinC/GBDTTraversalConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ class GBDTTraversalConverter (BaseConverter):
"""
@ staticmethod
def _get_limits(bdt):
mins=[None] * bdt.n_features_
maxs=[None] * bdt.n_features_
mins=[None] * bdt.n_features_in_
maxs=[None] * bdt.n_features_in_

for treeset in bdt.estimators_:
for tree in treeset:
for feature in range(bdt.n_features_):
for feature in range(bdt.n_features_in_):
features=tree.tree_.feature
if feature not in features: continue
min_=np.min(tree.tree_.threshold[features == feature])
Expand All @@ -41,7 +41,7 @@ def convert(self, bdt, name=None):

min_, max_=self._get_limits(bdt)

nX = bdt.n_features_
nX = bdt.n_features_in_

retvar="FLOAT_T ret[%d]" % n_classes
invar="FLOAT_T inp[%d]" % nX
Expand Down
8 changes: 4 additions & 4 deletions scikinC/GBDTUnrollingConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ def _singletree(self, tree, node):

@ staticmethod
def _get_limits(bdt):
mins=[None] * bdt.n_features_
maxs=[None] * bdt.n_features_
mins=[None] * bdt.n_features_in_
maxs=[None] * bdt.n_features_in_

for treeset in bdt.estimators_:
for tree in treeset:
for feature in range(bdt.n_features_):
for feature in range(bdt.n_features_in_):
features=tree.tree_.feature
if feature not in features: continue
min_=np.min(tree.tree_.threshold[features == feature])
Expand All @@ -57,7 +57,7 @@ def convert(self, bdt, name=None):

min_, max_=self._get_limits(bdt)

nX = bdt.n_features_
nX = bdt.n_features_in_

retvar="FLOAT_T ret[%d]" % n_classes
invar="FLOAT_T inp[%d]" % nX
Expand Down
67 changes: 57 additions & 10 deletions test/test_ColumnTransformerConverter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
from sklearn.preprocessing import FunctionTransformer, QuantileTransformer
from sklearn.preprocessing import FunctionTransformer, QuantileTransformer, StandardScaler
from sklearn.compose import ColumnTransformer

# PyTest testing infrastructure
Expand All @@ -21,17 +21,50 @@ def passthrough_transformer():
@pytest.fixture
def double_passthrough_transformer():
transformer_ = ColumnTransformer([
('keep1', 'passthrough', [0,1]),
('keep2', 'passthrough', [3,4]),
('keep1', 'passthrough', [0,2]),
('keep2', 'passthrough', [3,5]),
])
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_


@pytest.fixture
def ss_and_passthrough_transformer():
transformer_ = ColumnTransformer([
('ss', StandardScaler(), [1,2,3]),
], remainder='passthrough')
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_


@pytest.fixture
def qt_and_passthrough_transformer():
transformer_ = ColumnTransformer([
('qt', QuantileTransformer(output_distribution='normal'), [0,2]),
], remainder='passthrough')
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_


@pytest.fixture
def double_qt_and_passthrough_transformer():
transformer_ = ColumnTransformer([
('qt1', QuantileTransformer(n_quantiles=100, output_distribution='normal'), [3,4]),
('qt2', QuantileTransformer(n_quantiles=500, output_distribution='normal'), [1,2]),
], remainder='passthrough')
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_


@pytest.fixture
def qt_and_ss_and_passthrough_transformer():
transformer_ = ColumnTransformer([
('qt', QuantileTransformer(output_distribution='normal'), [0,1]),
('ss', StandardScaler(), [3,4]),
], remainder='passthrough')
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
Expand All @@ -49,30 +82,48 @@ def qt_and_ft_transformer_only():
return transformer_


@pytest.fixture
def double_qt_transformer_only():
transformer_ = ColumnTransformer([
('qt1', QuantileTransformer(n_quantiles=100, output_distribution='normal'), [5,6,7,8,9]),
('qt2', QuantileTransformer(n_quantiles=500, output_distribution='normal'), [0,1,2,3,4]),
])
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_


@pytest.fixture
def qt_and_ft_transformer_dropping():
transformer_ = ColumnTransformer([
('qt', QuantileTransformer(output_distribution='normal'), [0,1]),
('ft', FunctionTransformer(), [3,4]),
('qt', QuantileTransformer(output_distribution='normal'), [0,2]),
('ft', FunctionTransformer(), [3,5]),
], remainder='drop')
X = np.random.uniform (20,30,(1000, 10))
transformer_.fit (X)
return transformer_



transformers = [
'passthrough_transformer',
'double_passthrough_transformer',
'ss_and_passthrough_transformer',
'qt_and_passthrough_transformer',
'double_qt_and_passthrough_transformer',
'qt_and_ss_and_passthrough_transformer',
'qt_and_ft_transformer_only',
'double_qt_transformer_only',
'qt_and_ft_transformer_dropping',
]

invertible_transformers = [
'passthrough_transformer',
'ss_and_passthrough_transformer',
'qt_and_passthrough_transformer',
'double_qt_and_passthrough_transformer',
'qt_and_ss_and_passthrough_transformer',
'qt_and_ft_transformer_only',
'double_qt_transformer_only'
]


Expand Down Expand Up @@ -110,7 +161,3 @@ def test_inverse (scaler, request):
print (np.c_ [xtest, c, py])
assert np.abs(py-c).max() < 1e-4





0 comments on commit e8473b2

Please sign in to comment.