From c5b3b1ac9a5c2063b836d5eea1035d26ad8117aa Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Thu, 11 Jul 2024 11:39:49 -0700 Subject: [PATCH] feat: adds feature to save samples actually used by dataframes might be useful when trying to link results back to original metadata --- pyha_analyzer/dataset.py | 2 ++ pyha_analyzer/default_config.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/pyha_analyzer/dataset.py b/pyha_analyzer/dataset.py index c4d3521..4b4d810 100644 --- a/pyha_analyzer/dataset.py +++ b/pyha_analyzer/dataset.py @@ -106,6 +106,8 @@ def __init__(self, RandomApply([audtr.FrequencyMasking(cfg.freq_mask_param)], p=cfg.freq_mask_p), RandomApply([audtr.TimeMasking(cfg.time_mask_param)], p=cfg.time_mask_p)) + samples.to_csv(f"train-{train}_df-{self.samples.shape}.csv") + def calc_class_distribution(self) -> torch.Tensor: """ Returns class distribution (number of samples per class) """ class_dist = [] diff --git a/pyha_analyzer/default_config.yml b/pyha_analyzer/default_config.yml index 8a0f1f7..356a8aa 100644 --- a/pyha_analyzer/default_config.yml +++ b/pyha_analyzer/default_config.yml @@ -43,6 +43,7 @@ wandb_entity: "acoustic-species-identification" wandb_project: "acoustic-species-reu2023" wandb_run_name: "auto" debug: false +save_samples: true # Functional settings seed: 0