feat: adds feature to save samples actually used by dataframes

might be useful when trying to link results back to original metadata
UCSD-E4E · Jul 11, 2024 · c5b3b1a · c5b3b1a
1 parent c9a4606
commit c5b3b1a
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 0 deletions.
diff --git a/pyha_analyzer/dataset.py b/pyha_analyzer/dataset.py
@@ -106,6 +106,8 @@ def __init__(self,
                 RandomApply([audtr.FrequencyMasking(cfg.freq_mask_param)], p=cfg.freq_mask_p),
                 RandomApply([audtr.TimeMasking(cfg.time_mask_param)],      p=cfg.time_mask_p))
 
+        samples.to_csv(f"train-{train}_df-{self.samples.shape}.csv")
+
     def calc_class_distribution(self) -> torch.Tensor:
         """ Returns class distribution (number of samples per class) """
         class_dist = []

diff --git a/pyha_analyzer/default_config.yml b/pyha_analyzer/default_config.yml
@@ -43,6 +43,7 @@ wandb_entity: "acoustic-species-identification"
 wandb_project: "acoustic-species-reu2023"
 wandb_run_name: "auto"
 debug: false
+save_samples: true
 
 # Functional settings
 seed: 0