Merge pull request arrayfire#1 from syurkevi/feature/add-patcher

Patcher & MLP Examples
syurkevi · Mar 29, 2021 · 15cc94d · 15cc94d
2 parents 5ce44e7 + 8d8d2ef
commit 15cc94d
Show file tree

Hide file tree

Showing 22 changed files with 2,461 additions and 31 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+# Python cache
+__pycache__/
+
+# Virtual env
+venv
diff --git a/afsklearn/__init__.py b/afsklearn/__init__.py
@@ -1,7 +1,15 @@
-from .monkeypatcher import patch_sklearn, unpatch_sklearn, get_patch_names
+from pathlib import Path
+from typing import Any
 
-__version__ = '0.1.0'
-__author__ = 'ArrayFire'
-__all__ = [patch_sklearn, unpatch_sklearn, get_patch_names, 'preprocessing']
+import yaml
 
+app_dir = Path(__file__).resolve().parent
 
+
+def load_yaml_file(name: str, directory: Path = app_dir) -> Any:
+    path = directory / name
+    with path.open() as f:
+        return yaml.safe_load(f)
+
+
+patches_info = load_yaml_file("patched_modules.yml")
diff --git a/afsklearn/_classifier_mixin.py b/afsklearn/_classifier_mixin.py
@@ -0,0 +1,154 @@
+import arrayfire as af
+
+def _weighted_sum(sample_score, sample_weight, normalize=False):
+    if normalize:
+        return np.average(sample_score, weights=sample_weight)
+    elif sample_weight is not None:
+        return np.dot(sample_score, sample_weight)
+    else:
+        return sample_score.sum()
+
+def _check_targets(y_true, y_pred):
+    """Check that y_true and y_pred belong to the same classification task
+    This converts multiclass or binary types to a common shape, and raises a
+    ValueError for a mix of multilabel and multiclass targets, a mix of
+    multilabel formats, for the presence of continuous-valued or multioutput
+    targets, or for targets of different lengths.
+    Column vectors are squeezed to 1d, while multilabel formats are returned
+    as CSR sparse label indicators.
+    Parameters
+    ----------
+    y_true : array-like
+    y_pred : array-like
+    Returns
+    -------
+    type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}
+        The type of the true target data, as output by
+        ``utils.multiclass.type_of_target``
+    y_true : array or indicator matrix
+    y_pred : array or indicator matrix
+    """
+    check_consistent_length(y_true, y_pred)
+    type_true = type_of_target(y_true)
+    type_pred = type_of_target(y_pred)
+
+    y_type = {type_true, type_pred}
+    if y_type == {"binary", "multiclass"}:
+        y_type = {"multiclass"}
+
+    if len(y_type) > 1:
+        raise ValueError("Classification metrics can't handle a mix of {0} "
+                         "and {1} targets".format(type_true, type_pred))
+
+    # We can't have more than one value on y_type => The set is no more needed
+    y_type = y_type.pop()
+
+    # No metrics support "multiclass-multioutput" format
+    if (y_type not in ["binary", "multiclass", "multilabel-indicator"]):
+        raise ValueError("{0} is not supported".format(y_type))
+
+    if y_type in ["binary", "multiclass"]:
+        y_true = column_or_1d(y_true)
+        y_pred = column_or_1d(y_pred)
+        if y_type == "binary":
+            unique_values = np.union1d(y_true, y_pred)
+            if len(unique_values) > 2:
+                y_type = "multiclass"
+
+    if y_type.startswith('multilabel'):
+        y_true = csr_matrix(y_true)
+        y_pred = csr_matrix(y_pred)
+        y_type = 'multilabel-indicator'
+
+    return y_type, y_true, y_pred
+
+
+
+
+def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
+    """Accuracy classification score.
+    In multilabel classification, this function computes subset accuracy:
+    the set of labels predicted for a sample must *exactly* match the
+    corresponding set of labels in y_true.
+    Read more in the :ref:`User Guide <accuracy_score>`.
+    Parameters
+    ----------
+    y_true : 1d array-like, or label indicator array / sparse matrix
+        Ground truth (correct) labels.
+    y_pred : 1d array-like, or label indicator array / sparse matrix
+        Predicted labels, as returned by a classifier.
+    normalize : bool, optional (default=True)
+        If ``False``, return the number of correctly classified samples.
+        Otherwise, return the fraction of correctly classified samples.
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights.
+    Returns
+    -------
+    score : float
+        If ``normalize == True``, return the fraction of correctly
+        classified samples (float), else returns the number of correctly
+        classified samples (int).
+        The best performance is 1 with ``normalize == True`` and the number
+        of samples with ``normalize == False``.
+    See also
+    --------
+    jaccard_score, hamming_loss, zero_one_loss
+    Notes
+    -----
+    In binary and multiclass classification, this function is equal
+    to the ``jaccard_score`` function.
+    Examples
+    --------
+    >>> from sklearn.metrics import accuracy_score
+    >>> y_pred = [0, 2, 1, 3]
+    >>> y_true = [0, 1, 2, 3]
+    >>> accuracy_score(y_true, y_pred)
+    0.5
+    >>> accuracy_score(y_true, y_pred, normalize=False)
+    2
+    In the multilabel case with binary label indicators:
+    >>> import numpy as np
+    >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
+    0.5
+    """
+
+    # Compute accuracy for each possible representation
+    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
+    check_consistent_length(y_true, y_pred, sample_weight)
+    if y_type.startswith('multilabel'):
+        differing_labels = count_nonzero(y_true - y_pred, axis=1)
+        score = differing_labels == 0
+    else:
+        score = y_true == y_pred
+
+    return _weighted_sum(score, sample_weight, normalize)
+
+class afClassifierMixin:
+    """ArrayFire enabled Mixin class for all classifiers in scikit-learn."""
+
+    _estimator_type = "classifier"
+
+    def score(self, X, y, sample_weight=None):
+        """
+        Return the mean accuracy on the given test data and labels.
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            True labels for X.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+        """
+        #return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+        return #TMP
+
+    def _more_tags(self):
+        return {'requires_y': True}
diff --git a/afsklearn/_extmath.py b/afsklearn/_extmath.py
@@ -0,0 +1,55 @@
+import arrayfire as af
+import numpy as np
+import numpy
+import scipy.sparse as sparse
+#import scipy.sparse as sp
+import warnings
+import numbers
+from collections.abc import Sequence
+from scipy.sparse.base import spmatrix
+from itertools import chain
+
+from sklearn.utils.validation import _deprecate_positional_args
+
+
+@_deprecate_positional_args
+def safe_sparse_dot(a, b, *, dense_output=False):
+    """Dot product that handle the sparse matrix case correctly
+    Parameters
+    ----------
+    a : array or sparse matrix
+    b : array or sparse matrix
+    dense_output : boolean, (default=False)
+        When False, ``a`` and ``b`` both being sparse will yield sparse output.
+        When True, output will always be a dense array.
+    Returns
+    -------
+    dot_product : array or sparse matrix
+        sparse if ``a`` and ``b`` are sparse and ``dense_output=False``.
+    """
+    # if a.ndim > 2 or b.ndim > 2:
+    a = af.interop.to_array(a)
+    if a.numdims() > 2 or b.numdims() > 2:
+        if sparse.issparse(a):
+            # sparse is always 2D. Implies b is 3D+
+            # [i, j] @ [k, ..., l, m, n] -> [i, k, ..., l, n]
+            b_ = np.rollaxis(b, -2)
+            b_2d = b_.reshape((b.shape[-2], -1))
+            ret = a @ b_2d
+            ret = ret.reshape(a.shape[0], *b_.shape[1:])
+        elif sparse.issparse(b):
+            # sparse is always 2D. Implies a is 3D+
+            # [k, ..., l, m] @ [i, j] -> [k, ..., l, j]
+            a_2d = a.reshape(-1, a.shape[-1])
+            ret = a_2d @ b
+            ret = ret.reshape(*a.shape[:-1], b.shape[1])
+        else:
+            ret = np.dot(a, b)
+    else:
+        #ret = a @ b
+        ret = af.blas.matmul(a.as_type(af.Dtype.f32), b.as_type(af.Dtype.f32))
+
+    if (sparse.issparse(a) and sparse.issparse(b)
+            and dense_output and hasattr(ret, "toarray")):
+        return ret.toarray()
+    return ret