Skip to content

Commit

Permalink
Add SGD and OHE
Browse files Browse the repository at this point in the history
  • Loading branch information
roaffix committed Jul 28, 2021
1 parent 68fca33 commit 96317a8
Show file tree
Hide file tree
Showing 20 changed files with 2,369 additions and 68 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ build
dist
*.egg-info

# Static typing
.mypy_cache

# Virtual env
venv
62 changes: 62 additions & 0 deletions afsklearn/_class_weight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np # FIXME
from sklearn.utils import _deprecate_positional_args


@_deprecate_positional_args
def compute_class_weight(class_weight, *, classes, y):
"""Estimate class weights for unbalanced datasets.
Parameters
----------
class_weight : dict, 'balanced' or None
If 'balanced', class weights will be given by
``n_samples / (n_classes * np.bincount(y))``.
If a dictionary is given, keys are classes and values
are corresponding class weights.
If None is given, the class weights will be uniform.
classes : ndarray
Array of the classes occurring in the data, as given by
``np.unique(y_org)`` with ``y_org`` the original class labels.
y : array-like of shape (n_samples,)
Array of original class labels per sample.
Returns
-------
class_weight_vect : ndarray of shape (n_classes,)
Array with class_weight_vect[i] the weight for i-th class.
References
----------
The "balanced" heuristic is inspired by
Logistic Regression in Rare Events Data, King, Zen, 2001.
"""
# Import error caused by circular imports.
from .preprocessing._label import afLabelEncoder

if set(y) - set(classes):
raise ValueError("classes should include all valid labels that can "
"be in y")
if class_weight is None or len(class_weight) == 0:
# uniform class weights
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
elif class_weight == 'balanced':
# Find the weight of each class as present in y.
le = afLabelEncoder()
y_ind = le.fit_transform(y)
if not all(np.in1d(classes, le.classes_)):
raise ValueError("classes should have valid labels that are in y")

recip_freq = len(y) / (len(le.classes_) *
np.bincount(y_ind).astype(np.float64))
weight = recip_freq[le.transform(classes)]
else:
# user-defined dictionary
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
if not isinstance(class_weight, dict):
raise ValueError("class_weight must be dict, 'balanced', or None,"
" got: %r" % class_weight)
for c in class_weight:
i = np.searchsorted(classes, c)
if i >= len(classes) or classes[i] != c:
raise ValueError("Class label {} not present.".format(c))
else:
weight[i] = class_weight[c]

return weight
78 changes: 74 additions & 4 deletions afsklearn/_classifier_mixin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import arrayfire as af


def _weighted_sum(sample_score, sample_weight, normalize=False):
if normalize:
return np.average(sample_score, weights=sample_weight)
Expand All @@ -8,6 +9,7 @@ def _weighted_sum(sample_score, sample_weight, normalize=False):
else:
return sample_score.sum()


def _check_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
Expand Down Expand Up @@ -63,8 +65,6 @@ def _check_targets(y_true, y_pred):
return y_type, y_true, y_pred




def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
"""Accuracy classification score.
In multilabel classification, this function computes subset accuracy:
Expand Down Expand Up @@ -123,6 +123,7 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):

return _weighted_sum(score, sample_weight, normalize)


class afClassifierMixin:
"""ArrayFire enabled Mixin class for all classifiers in scikit-learn."""

Expand All @@ -147,8 +148,77 @@ def score(self, X, y, sample_weight=None):
score : float
Mean accuracy of self.predict(X) wrt. y.
"""
#return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
return #TMP
# return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
return # TMP

def _more_tags(self):
return {'requires_y': True}


class afLinearClassifierMixin(afClassifierMixin):
"""Mixin for linear classifiers.
Handles prediction for sparse and dense X.
"""

def decision_function(self, X):
"""
Predict confidence scores for samples.
The confidence score for a sample is proportional to the signed
distance of that sample to the hyperplane.
Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features)
Samples.
Returns
-------
array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
Confidence scores per (sample, class) combination. In the binary
case, confidence score for self.classes_[1] where >0 means this
class would be predicted.
"""
check_is_fitted(self)

X = check_array(X, accept_sparse='csr')

n_features = self.coef_.shape[1]
if X.shape[1] != n_features:
raise ValueError("X has %d features per sample; expecting %d"
% (X.shape[1], n_features))

scores = safe_sparse_dot(X, self.coef_.T,
dense_output=True) + self.intercept_
return scores.ravel() if scores.shape[1] == 1 else scores

def predict(self, X):
"""
Predict class labels for samples in X.
Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features)
Samples.
Returns
-------
C : array, shape [n_samples]
Predicted class label per sample.
"""
scores = self.decision_function(X)
if len(scores.shape) == 1:
indices = (scores > 0).astype(int)
else:
indices = scores.argmax(axis=1)
return self.classes_[indices]

def _predict_proba_lr(self, X):
"""Probability estimation for OvR logistic regression.
Positive class probabilities are computed as
1. / (1. + np.exp(-self.decision_function(X)));
multiclass is handled by normalizing that over all classes.
"""
prob = self.decision_function(X)
expit(prob, out=prob)
if prob.ndim == 1:
return np.vstack([1 - prob, prob]).T
else:
# OvR normalization, like LibLinear's predict_probability
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
return prob
Loading

0 comments on commit 96317a8

Please sign in to comment.