diff --git a/setup.py b/setup.py index 03b75fae8..9d1fea2f4 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # Setup configuration setuptools.setup( name="simba-uw-tf-dev", - version="2.4.6", + version="2.4.8", author="Simon Nilsson, Jia Jie Choong, Sophia Hwang", author_email="sronilsson@gmail.com", description="Toolkit for computer classification and analysis of behaviors in experimental animals", diff --git a/simba/SimBA.py b/simba/SimBA.py index d992929a4..c2704d670 100644 --- a/simba/SimBA.py +++ b/simba/SimBA.py @@ -356,7 +356,7 @@ def activate(box, *args): label_behavior_frm = CreateLabelFrameWithIcon(parent=tab7, header="LABEL BEHAVIOR", icon_name=Keys.DOCUMENTATION.value, icon_link=Links.LABEL_BEHAVIOR.value) select_video_btn_new = SimbaButton(parent=label_behavior_frm, txt="Select video (create new video annotation)", img='label_blue', txt_clr='navy', cmd=select_labelling_video, cmd_kwargs={'config_path': lambda :self.config_path, 'threshold_dict': lambda: None, 'setting': lambda: "from_scratch", 'continuing': lambda: False}, thread=False) - select_video_btn_continue = SimbaButton(parent=label_behavior_frm, txt="Select video (continue existing video annotation)", img='label_yellow', txt_clr='darkgoldenrod', cmd=select_labelling_video, cmd_kwargs={'config_path': lambda: self.config_path, 'threshold_dict': lambda:None, 'setting': lambda:None, 'continuing': lambda:True}, thread=False) + select_video_btn_continue = SimbaButton(parent=label_behavior_frm, txt="Select video (continue existing video annotation)", img='label_yellow', txt_clr='darkgoldenrod', cmd=select_labelling_video, cmd_kwargs={'config_path': lambda: self.config_path, 'threshold_dict': lambda:None, 'setting': lambda: None, 'continuing': lambda:True}, thread=False) label_thirdpartyann = CreateLabelFrameWithIcon(parent=tab7, header="IMPORT THIRD-PARTY BEHAVIOR ANNOTATIONS", icon_name=Keys.DOCUMENTATION.value, icon_link=Links.THIRD_PARTY_ANNOTATION.value) button_importmars = SimbaButton(parent=label_thirdpartyann, txt="Import MARS Annotation (select folder with .annot files)", txt_clr="blue", cmd=self.importMARS, thread=False) diff --git a/simba/labelling/labelling_interface.py b/simba/labelling/labelling_interface.py index 16d23f0fe..efdcb1c13 100644 --- a/simba/labelling/labelling_interface.py +++ b/simba/labelling/labelling_interface.py @@ -63,7 +63,7 @@ def __init__(self, config_path: Union[str, os.PathLike], file_path: Union[str, os.PathLike], threshold_dict: Optional[Dict[str, float]] = None, - setting: Literal["from_scratch", "pseudo"] = "pseudo", + setting: Optional[Literal["from_scratch", "pseudo"]] = "from_scratch", continuing: Optional[bool] = False): ConfigReader.__init__(self, config_path=config_path) @@ -360,7 +360,8 @@ def select_labelling_video(config_path: Union[str, os.PathLike], check_file_exist_and_readable(file_path=config_path) if threshold_dict is not None: check_valid_dict(x=threshold_dict, valid_key_dtypes=(str,), valid_values_dtypes=(float,)) - check_str(name='setting', value=setting, options=('pseudo', "from_scratch")) + if setting is not None: + check_str(name='setting', value=setting, options=('pseudo', "from_scratch",)) check_valid_boolean(value=[continuing], source=select_labelling_video.__name__) diff --git a/simba/mixins/statistics_mixin.py b/simba/mixins/statistics_mixin.py index 62bb56cc6..8bc3a9f4c 100644 --- a/simba/mixins/statistics_mixin.py +++ b/simba/mixins/statistics_mixin.py @@ -3931,6 +3931,45 @@ def calinski_harabasz(x: np.ndarray, y: np.ndarray) -> float: else: return extra_dispersion * (x.shape[0] - n_labels) / denominator + def silhouette_score(self, x: np.ndarray, y: np.ndarray) -> float: + """ + Compute the silhouette score for the given dataset and labels. + + :param np.ndarray x: The dataset as a 2D NumPy array of shape (n_samples, n_features). + :param np.ndarray y: Cluster labels for each data point as a 1D NumPy array of shape (n_samples,). + :returns: The average silhouette score for the dataset. + :rtype: float + + :example: + >>> x, y = make_blobs(n_samples=10000, n_features=400, centers=5, cluster_std=10, center_box=(-1, 1)) + >>> score = silhouette_score(x=x, y=y) + + >>> from sklearn.metrics import silhouette_score as sklearn_silhouette # SKLEARN ALTERNATIVE + >>> score_sklearn = sklearn_silhouette(x, y) + + """ + dists = cdist(x, x) + results = np.full(x.shape[0], fill_value=-1.0, dtype=np.float32) + cluster_ids = np.unique(y) + cluster_indices = {cluster_id: np.argwhere(y == cluster_id).flatten() for cluster_id in cluster_ids} + + for i in range(x.shape[0]): + intra_idx = cluster_indices[y[i]] + if len(intra_idx) <= 1: + a_i = 0.0 + else: + intra_distances = dists[i, intra_idx] + a_i = np.sum(intra_distances) / (intra_distances.shape[0] - 1) + b_i = np.inf + for cluster_id in cluster_ids: + if cluster_id != y[i]: + inter_idx = cluster_indices[cluster_id] + inter_distances = dists[i, inter_idx] + b_i = min(b_i, np.mean(inter_distances)) + results[i] = (b_i - a_i) / max(a_i, b_i) + + return np.mean(results) + @staticmethod def adjusted_rand(x: np.ndarray, y: np.ndarray) -> float: """