Skip to content

Commit

Permalink
Update.
Browse files Browse the repository at this point in the history
  • Loading branch information
Krsto Proroković committed Feb 20, 2024
1 parent 6cc899a commit b295916
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 9 deletions.
27 changes: 20 additions & 7 deletions bias_scan/clustering/_bahc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

class BiasAwareHierarchicalClustering(ABC, BaseEstimator, ClusterMixin):
"""
Base class for Bias Aware Hierarchical Clustering.
Base class for Bias-Aware Hierarchical Clustering.
This abstract class specifies an interface for all bias aware hierarchical
This abstract class specifies an interface for all bias-aware hierarchical
clustering classes.
Parameters
Expand All @@ -17,14 +17,23 @@ class BiasAwareHierarchicalClustering(ABC, BaseEstimator, ClusterMixin):
Maximum number of iterations.
min_cluster_size : int
Minimum size of a cluster.
Attributes
----------
n_cluster_ : int
Number of clusters.
labels_ : ndarray of shape (n_samples,)
Labels for each point.
biases_ : ndarray of shape (n_clusters,)
Biases for each cluster.
"""

def __init__(self, max_iter, min_cluster_size):
self.max_iter = max_iter
self.min_cluster_size = min_cluster_size

def fit(self, X, y):
"""What the function does
"""Compute bias-aware hierarchical clustering.
Parameters
----------
Expand All @@ -37,7 +46,7 @@ def fit(self, X, y):
Returns
-------
self : object
Description here
Fitted estimator.
"""
n_samples, _ = X.shape
self.n_clusters_ = 1
Expand Down Expand Up @@ -81,12 +90,16 @@ def fit(self, X, y):
biases.append(bias)
labels = np.array(labels + [label for _, label, _ in heap])
biases = np.array(biases + [bias for _, _, bias in heap])
self.biases_ = biases[np.argsort(labels)]
sorted_indices = np.argsort(-biases)
labels = labels[sorted_indices]
self.biases_ = biases[sorted_indices]
d = { label: index for label, index in zip(labels, range(n_samples))}
self.labels_ = np.array(d[label] for label in self.labels_)
return self

@abstractmethod
def split(self, X):
"""What the function does
"""Splits the data into two clusters.
Parameters
----------
Expand All @@ -95,6 +108,6 @@ def split(self, X):
Returns
-------
labels : (n_samples)
Description goes here.
ndarray of shape (n_samples,)
"""
pass
2 changes: 1 addition & 1 deletion bias_scan/clustering/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


class BiasAwareHierarchicalKMeans(BiasAwareHierarchicalClustering):
"""Bias Aware Hierarchical k-Means Clustering.
"""Bias-Aware Hierarchical k-Means Clustering.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion bias_scan/clustering/_kmodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


class BiasAwareHierarchicalKModes(BiasAwareHierarchicalClustering):
"""Bias Aware Hierarchical k-Means Clustering.
"""Bias-Aware Hierarchical k-Modes Clustering.
Parameters
----------
Expand Down

0 comments on commit b295916

Please sign in to comment.