Skip to content

Commit

Permalink
feat: extra error handling for bad files
Browse files Browse the repository at this point in the history
  • Loading branch information
Sean1572 committed Jul 11, 2024
1 parent c9a4606 commit 6f0edfd
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions pyha_analyzer/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,20 @@ def verify_audio(self) -> None:
"""
Checks to make sure files exist that are referenced in input df
"""
# check frist file in df
# assume if frist file missing, all files missing
test_file_name = self.samples[self.cfg.file_name_col].iloc[0]
test_file_loc = os.path.join(self.cfg.data_path, test_file_name)

if not os.path.exists(test_file_loc):
logger.error("""
ERROR: Missing the frist file from the frist row of samples!!!
file in question: %s
check manually CSV and datapath to fix this error
""", test_file_loc)
exit()

# check rest of the data
missing_files = pd.Series(self.samples[self.cfg.file_name_col].unique()) \
.progress_apply(
lambda file: "good" if os.path.join(
Expand Down Expand Up @@ -211,6 +225,10 @@ def serialize_data(self) -> None:
raise FileNotFoundError("There were no valid filepaths found, check csv")

files = files[files["files"] != "bad"]

if files.shape == 0:
raise FileNotFoundError("Filepaths were found, but all files could not be read, check for corrupted files")

self.samples = self.samples.merge(files, how="left",
left_on=self.cfg.file_name_col,
right_on="FILE NAME").dropna()
Expand Down

0 comments on commit 6f0edfd

Please sign in to comment.