forked from arrayfire/af-sklearn-monkeypatch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request arrayfire#1 from syurkevi/feature/add-patcher
Patcher & MLP Examples
- Loading branch information
Showing
22 changed files
with
2,461 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Python cache | ||
__pycache__/ | ||
|
||
# Virtual env | ||
venv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,15 @@ | ||
from .monkeypatcher import patch_sklearn, unpatch_sklearn, get_patch_names | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
__version__ = '0.1.0' | ||
__author__ = 'ArrayFire' | ||
__all__ = [patch_sklearn, unpatch_sklearn, get_patch_names, 'preprocessing'] | ||
import yaml | ||
|
||
app_dir = Path(__file__).resolve().parent | ||
|
||
|
||
def load_yaml_file(name: str, directory: Path = app_dir) -> Any: | ||
path = directory / name | ||
with path.open() as f: | ||
return yaml.safe_load(f) | ||
|
||
|
||
patches_info = load_yaml_file("patched_modules.yml") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
import arrayfire as af | ||
|
||
def _weighted_sum(sample_score, sample_weight, normalize=False): | ||
if normalize: | ||
return np.average(sample_score, weights=sample_weight) | ||
elif sample_weight is not None: | ||
return np.dot(sample_score, sample_weight) | ||
else: | ||
return sample_score.sum() | ||
|
||
def _check_targets(y_true, y_pred): | ||
"""Check that y_true and y_pred belong to the same classification task | ||
This converts multiclass or binary types to a common shape, and raises a | ||
ValueError for a mix of multilabel and multiclass targets, a mix of | ||
multilabel formats, for the presence of continuous-valued or multioutput | ||
targets, or for targets of different lengths. | ||
Column vectors are squeezed to 1d, while multilabel formats are returned | ||
as CSR sparse label indicators. | ||
Parameters | ||
---------- | ||
y_true : array-like | ||
y_pred : array-like | ||
Returns | ||
------- | ||
type_true : one of {'multilabel-indicator', 'multiclass', 'binary'} | ||
The type of the true target data, as output by | ||
``utils.multiclass.type_of_target`` | ||
y_true : array or indicator matrix | ||
y_pred : array or indicator matrix | ||
""" | ||
check_consistent_length(y_true, y_pred) | ||
type_true = type_of_target(y_true) | ||
type_pred = type_of_target(y_pred) | ||
|
||
y_type = {type_true, type_pred} | ||
if y_type == {"binary", "multiclass"}: | ||
y_type = {"multiclass"} | ||
|
||
if len(y_type) > 1: | ||
raise ValueError("Classification metrics can't handle a mix of {0} " | ||
"and {1} targets".format(type_true, type_pred)) | ||
|
||
# We can't have more than one value on y_type => The set is no more needed | ||
y_type = y_type.pop() | ||
|
||
# No metrics support "multiclass-multioutput" format | ||
if (y_type not in ["binary", "multiclass", "multilabel-indicator"]): | ||
raise ValueError("{0} is not supported".format(y_type)) | ||
|
||
if y_type in ["binary", "multiclass"]: | ||
y_true = column_or_1d(y_true) | ||
y_pred = column_or_1d(y_pred) | ||
if y_type == "binary": | ||
unique_values = np.union1d(y_true, y_pred) | ||
if len(unique_values) > 2: | ||
y_type = "multiclass" | ||
|
||
if y_type.startswith('multilabel'): | ||
y_true = csr_matrix(y_true) | ||
y_pred = csr_matrix(y_pred) | ||
y_type = 'multilabel-indicator' | ||
|
||
return y_type, y_true, y_pred | ||
|
||
|
||
|
||
|
||
def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None): | ||
"""Accuracy classification score. | ||
In multilabel classification, this function computes subset accuracy: | ||
the set of labels predicted for a sample must *exactly* match the | ||
corresponding set of labels in y_true. | ||
Read more in the :ref:`User Guide <accuracy_score>`. | ||
Parameters | ||
---------- | ||
y_true : 1d array-like, or label indicator array / sparse matrix | ||
Ground truth (correct) labels. | ||
y_pred : 1d array-like, or label indicator array / sparse matrix | ||
Predicted labels, as returned by a classifier. | ||
normalize : bool, optional (default=True) | ||
If ``False``, return the number of correctly classified samples. | ||
Otherwise, return the fraction of correctly classified samples. | ||
sample_weight : array-like of shape (n_samples,), default=None | ||
Sample weights. | ||
Returns | ||
------- | ||
score : float | ||
If ``normalize == True``, return the fraction of correctly | ||
classified samples (float), else returns the number of correctly | ||
classified samples (int). | ||
The best performance is 1 with ``normalize == True`` and the number | ||
of samples with ``normalize == False``. | ||
See also | ||
-------- | ||
jaccard_score, hamming_loss, zero_one_loss | ||
Notes | ||
----- | ||
In binary and multiclass classification, this function is equal | ||
to the ``jaccard_score`` function. | ||
Examples | ||
-------- | ||
>>> from sklearn.metrics import accuracy_score | ||
>>> y_pred = [0, 2, 1, 3] | ||
>>> y_true = [0, 1, 2, 3] | ||
>>> accuracy_score(y_true, y_pred) | ||
0.5 | ||
>>> accuracy_score(y_true, y_pred, normalize=False) | ||
2 | ||
In the multilabel case with binary label indicators: | ||
>>> import numpy as np | ||
>>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2))) | ||
0.5 | ||
""" | ||
|
||
# Compute accuracy for each possible representation | ||
y_type, y_true, y_pred = _check_targets(y_true, y_pred) | ||
check_consistent_length(y_true, y_pred, sample_weight) | ||
if y_type.startswith('multilabel'): | ||
differing_labels = count_nonzero(y_true - y_pred, axis=1) | ||
score = differing_labels == 0 | ||
else: | ||
score = y_true == y_pred | ||
|
||
return _weighted_sum(score, sample_weight, normalize) | ||
|
||
class afClassifierMixin: | ||
"""ArrayFire enabled Mixin class for all classifiers in scikit-learn.""" | ||
|
||
_estimator_type = "classifier" | ||
|
||
def score(self, X, y, sample_weight=None): | ||
""" | ||
Return the mean accuracy on the given test data and labels. | ||
In multi-label classification, this is the subset accuracy | ||
which is a harsh metric since you require for each sample that | ||
each label set be correctly predicted. | ||
Parameters | ||
---------- | ||
X : array-like of shape (n_samples, n_features) | ||
Test samples. | ||
y : array-like of shape (n_samples,) or (n_samples, n_outputs) | ||
True labels for X. | ||
sample_weight : array-like of shape (n_samples,), default=None | ||
Sample weights. | ||
Returns | ||
------- | ||
score : float | ||
Mean accuracy of self.predict(X) wrt. y. | ||
""" | ||
#return accuracy_score(y, self.predict(X), sample_weight=sample_weight) | ||
return #TMP | ||
|
||
def _more_tags(self): | ||
return {'requires_y': True} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import arrayfire as af | ||
import numpy as np | ||
import numpy | ||
import scipy.sparse as sparse | ||
#import scipy.sparse as sp | ||
import warnings | ||
import numbers | ||
from collections.abc import Sequence | ||
from scipy.sparse.base import spmatrix | ||
from itertools import chain | ||
|
||
from sklearn.utils.validation import _deprecate_positional_args | ||
|
||
|
||
@_deprecate_positional_args | ||
def safe_sparse_dot(a, b, *, dense_output=False): | ||
"""Dot product that handle the sparse matrix case correctly | ||
Parameters | ||
---------- | ||
a : array or sparse matrix | ||
b : array or sparse matrix | ||
dense_output : boolean, (default=False) | ||
When False, ``a`` and ``b`` both being sparse will yield sparse output. | ||
When True, output will always be a dense array. | ||
Returns | ||
------- | ||
dot_product : array or sparse matrix | ||
sparse if ``a`` and ``b`` are sparse and ``dense_output=False``. | ||
""" | ||
# if a.ndim > 2 or b.ndim > 2: | ||
a = af.interop.to_array(a) | ||
if a.numdims() > 2 or b.numdims() > 2: | ||
if sparse.issparse(a): | ||
# sparse is always 2D. Implies b is 3D+ | ||
# [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n] | ||
b_ = np.rollaxis(b, -2) | ||
b_2d = b_.reshape((b.shape[-2], -1)) | ||
ret = a @ b_2d | ||
ret = ret.reshape(a.shape[0], *b_.shape[1:]) | ||
elif sparse.issparse(b): | ||
# sparse is always 2D. Implies a is 3D+ | ||
# [k, ..., l, m] @ [i, j] -> [k, ..., l, j] | ||
a_2d = a.reshape(-1, a.shape[-1]) | ||
ret = a_2d @ b | ||
ret = ret.reshape(*a.shape[:-1], b.shape[1]) | ||
else: | ||
ret = np.dot(a, b) | ||
else: | ||
#ret = a @ b | ||
ret = af.blas.matmul(a.as_type(af.Dtype.f32), b.as_type(af.Dtype.f32)) | ||
|
||
if (sparse.issparse(a) and sparse.issparse(b) | ||
and dense_output and hasattr(ret, "toarray")): | ||
return ret.toarray() | ||
return ret |
Oops, something went wrong.