Skip to content

Commit

Permalink
Merge pull request #4 from landerlini/column_transformer
Browse files Browse the repository at this point in the history
Implementing ColumnTransformer
  • Loading branch information
landerlini authored Dec 3, 2021
2 parents 385c9ab + 1d7d8b1 commit c408e2b
Show file tree
Hide file tree
Showing 9 changed files with 468 additions and 31 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ A few notes:
| `MinMaxScaler` | Available | Available | |
| `StandardScaler` | Available | Available | |
| `QuantileTransformer` | Available | Available | |
| `FunctionTransformer` | Available | Available | Only functions in math.h |
| `ColumnTransformer` | Available | Available | Only integer column indices |
| `Pipeline` | Available | Partial | Pipelines of pipelines break |

#### Scikit-Learn models
Expand Down
139 changes: 139 additions & 0 deletions scikinC/ColumnTransformerConverter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import numpy as np

from sklearn.preprocessing import FunctionTransformer

import scikinC
from scikinC import BaseConverter
from ._tools import array2c

import sys


class ColumnTransformerConverter (BaseConverter):
def convert(self, model, name=None):
lines = self.header()

index_mapping = []
keys = []
transformers = []
for key, transformer, columns in model.transformers_:
if transformer == 'drop' or len(columns) == 0:
continue

if not all([isinstance(c, int) or int(c) == c for c in columns]):

raise NotImplementedError ("Columns can only be indexed with integers, got",
[type(c) for c in columns])

index_mapping += columns

if key is None:
key = "Preprocessor"
if key in keys:
key.append (str(1+len(keys)))

if isinstance(transformer, (FunctionTransformer,)):
if transformer.func is None and transformer.inverse_func is None:
transformer = 'passthrough'
else:
transformer.n_features_in_ = len(columns)

transformers.append (('colcnv_%s_%s' % (name, key), transformer, columns))


if len([t for _, t, _ in transformers if t != 'passthrough']):
lines.append(
scikinC.convert({k: t for k,t,_ in transformers if t != 'passthrough'})
)

mapping = {k: c for k,_,c in transformers}

nFeatures = 1+max(index_mapping)

lines.append("""
extern "C"
FLOAT_T* %(name)s (FLOAT_T* ret, const FLOAT_T *input)
{
int c;
FLOAT_T bufin[%(nFeatures)d], bufout[%(nFeatures)s];
""" % dict(
name=name,
nFeatures=nFeatures,
)
)

for key, transformer, columns in transformers:
lines.append("// Transforming %s columns" % key)
if transformer == 'passthrough':
for column in columns:
lines.append("""
ret [%(output)d] = input[%(column)d];
"""%dict(output=index_mapping.index(column), column=column))
else:
for iCol, column in enumerate(columns):
lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""%
dict(iCol=iCol, column=column))
lines.append (""" %(name)s (bufout, bufin);"""
% dict(name=key))
for iCol, column in enumerate(columns):
lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d];"""%
dict(index_out=index_mapping.index(column), iCol=iCol))

lines.append ("""
return ret;
}
""")

## Check for not-invertible models
## Any dropped columns?
if any([t == 'drop' for _, t, _ in model.transformers_]):
return "\n".join(lines)

## Any columns appearing twice?
if any([index_mapping.count(c)>1 for c in index_mapping]):
return "\n".join(lines)

## Any transformer not implementing an inverse transform?
if not all([t == 'passthrough' or hasattr(t, 'inverse_transform')] for _,t,_ in transformers):
return "\n".join(lines)

index_mapping = [index_mapping.index(c) for c in range(len(index_mapping))]

lines.append("""
extern "C"
FLOAT_T* %(name)s_inverse (FLOAT_T* ret, const FLOAT_T *input)
{
int c;
FLOAT_T bufin[%(nFeatures)d], bufout[%(nFeatures)s];
""" % dict(
name=name,
nFeatures=nFeatures,
)
)

for key, transformer, columns in transformers:
lines.append("// Transforming %s columns" % key)
if transformer == 'passthrough':
for column in columns:
lines.append("""
ret [%(output)d] = input[%(column)d];
"""%dict(output=index_mapping.index(column), column=column))
else:
for iCol, column in enumerate(columns):
lines.append(""" bufin [%(iCol)d] = input[%(column)d];"""%
dict(iCol=iCol, column=column))
lines.append (""" %(name)s_inverse (bufout, bufin);"""%
dict(name=key))
for iCol, column in enumerate(columns):
lines.append(""" ret[%(index_out)d] = bufout[%(iCol)d]; """ %
dict(index_out=index_mapping.index(column), iCol=iCol))

lines.append ("""
return ret;
}
""")

return "\n".join(lines)

97 changes: 97 additions & 0 deletions scikinC/FunctionTransformerConverter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np

from scikinC import BaseConverter
from ._tools import array2c


class FunctionTransformerConverter (BaseConverter):
def convert(self, model, name=None):
lines = self.header()

if not hasattr(model, 'n_features_in_'):
raise NotImplementedError(
"Conversion requires its n_features_in_ attribute to be set")

nFeatures = model.n_features_in_

func_dict = {
None: '{x}',
np.log1p: 'log(1+{x})',
np.expm1: 'exp({x})-1',
np.arcsin: 'asin({x})',
np.arccos: 'acos({x})',
np.arctan: 'atan({x})',
np.abs: 'fabs({x})',
}

if model.func is not None or model.inverse_func is not None:
lines.append("#include <math.h>")

c_funcs = ('sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'exp', 'log', 'log10', 'sqrt', 'ceil', 'floor')
func_dict.update({getattr(np, f): "%s({x})"%f for f in c_funcs})

if hasattr(model, 'func_inC'):
fwd = model.func_inC
elif model.func in func_dict.keys():
fwd = func_dict[model.func]
else:
raise NotImplementedError(
"Translation of function %s not implemented nor defined as func_inC argument"
% str(model.func))


if hasattr(model, 'inverse_func_inC'):
bwd = model.inverse_func_inC
elif model.inverse_func in func_dict.keys():
bwd = func_dict[model.inverse_func]
else:
raise NotImplementedError(
"Translation of function %s not implemented nor defined as inverse_func_inC argument"
% str(model.inverse_func))


## Input sanitization
if any([banned in fwd for banned in (';', '//', '/*', '*/')]):
raise ValueError("Invalid implementation: %s" % fwd);
if any([banned in bwd for banned in (';', '//', '/*', '*/')]):
raise ValueError("Invalid implementation: %s" % bwd);


lines.append("""
extern "C"
FLOAT_T* %(name)s (FLOAT_T* ret, const FLOAT_T *input)
{
int c;
for (int c = 0; c < %(nFeatures)d; ++c)
ret [c] = %(func)s;
return ret;
}
""" % dict(
name=name,
nFeatures=nFeatures,
func=fwd.format(x='input[c]'),
)
)

lines.append ( """
extern "C"
FLOAT_T * %(name)s_inverse(FLOAT_T * ret, const FLOAT_T * input)
{
int c;
for (int c=0; c < %(nFeatures)d; ++c)
ret [c]= %(func)s;
return ret;
}
""" % dict (
name=name,
nFeatures = nFeatures,
func=bwd.format(x='input[c]'),
)
)


return "\n".join(lines)
35 changes: 5 additions & 30 deletions scikinC/QuantileTransformerConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,19 @@
import sys
from scikinC import BaseConverter
from scipy import stats
from ._tools import array2c
from ._tools import array2c, get_interpolation_function

class QuantileTransformerConverter (BaseConverter):
def convert (self, model, name = None):
def convert (self, model, name=None):
lines = self.header()

distr = model.output_distribution
if distr not in ['normal', 'uniform']:
raise NotImplementedError ("Unexpected distribution %s" % distr)

lines . append ( """
extern "C"
FLOAT_T qtc_interpolate_for_%(name)s ( FLOAT_T x, FLOAT_T *xs, FLOAT_T *ys, int N )
{
int min = 0;
int max = N;
int n;
if (N<=1) return ys[0];
if (x <= xs[0]) return ys[0];
if (x >= xs[N-1]) return ys[N-1];
for (;;)
{
n = (min + max)/2;
if ( x < xs[n] )
max = n;
else if ( x >= xs[n+1] )
min = n;
else
break;
}
return (x - xs[n])/(xs[n+1]-xs[n])*(ys[n+1]-ys[n]) + ys[n];
}
""" % dict(name = name));
lines.append (
get_interpolation_function('qtc_interpolate_for_%s'%(name))
)

q = model.quantiles_
nQuantiles = model.quantiles_.shape[0]
Expand Down
2 changes: 2 additions & 0 deletions scikinC/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
'DecorrTransformer': 'DecorrTransformerConverter',
'Pipeline': 'PipelineConverter',
'FastQuantileLayer': 'FastQuantileLayerConverter',
'FunctionTransformer': 'FunctionTransformerConverter',
'ColumnTransformer': 'ColumnTransformerConverter',

## Keras
'Sequential': 'KerasSequentialConverter',
Expand Down
31 changes: 31 additions & 0 deletions scikinC/_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,34 @@ def retrieve_prior (bdt):
)


################################################################################
def get_interpolation_function (func_name):
return """
extern "C"
FLOAT_T %(func_name)s ( FLOAT_T x, FLOAT_T *xs, FLOAT_T *ys, int N )
{
int min = 0;
int max = N;
int n;
if (N<=1) return ys[0];
if (x <= xs[0]) return ys[0];
if (x >= xs[N-1]) return ys[N-1];
for (;;)
{
n = (min + max)/2;
if ( x < xs[n] )
max = n;
else if ( x >= xs[n+1] )
min = n;
else
break;
}
return (x - xs[n])/(xs[n+1]-xs[n])*(ys[n+1]-ys[n]) + ys[n];
}
""" % dict(func_name=func_name);

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setup(
name='scikinC', # Required
version='0.1.0', # Required
version='0.2.0', # Required
description='A converter for scikit learn and keras to hardcoded C function',
long_description=long_description,
long_description_content_type='text/markdown', # Optional (see note above)
Expand Down
Loading

0 comments on commit c408e2b

Please sign in to comment.