From 234f2d2457559b36b393fa109a54787ae889581c Mon Sep 17 00:00:00 2001 From: suke <32221726+wangsrGit119@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:37:27 +0800 Subject: [PATCH] Add audio file detection by magic numbers Add audio file detection by magic numbers # Add audio file detection by magic numbers ## Changes - Add `is_audio_file()` function to detect audio files by checking file headers - Support WAV, FLAC, MP3, M4A/AAC formats - Replace extension-based detection with content-based detection in `read_and_config_file()` ## Why More reliable audio file detection without relying on file extensions ## Test - Tested with various audio formats (WAV, MP3) --- clearvoice/dataloader/misc.py | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/clearvoice/dataloader/misc.py b/clearvoice/dataloader/misc.py index 4315161..3ed6d80 100644 --- a/clearvoice/dataloader/misc.py +++ b/clearvoice/dataloader/misc.py @@ -11,6 +11,41 @@ import os import sys import librosa +import struct + +def is_audio_file(file_path): + """ + 通过检查文件头部的魔数来判断文件是否为音频文件 + + 支持的格式: + - WAV (RIFF header) + - FLAC (fLaC header) + - MP3 (ID3 or MPEG sync) + - M4A/AAC (ftyp header) + """ + try: + with open(file_path, 'rb') as f: + header = f.read(12) # 读取前12个字节 + + # WAV: RIFF xxxxWAVE + if header.startswith(b'RIFF') and b'WAVE' in header: + return True + + # FLAC: fLaC + if header.startswith(b'fLaC'): + return True + + # MP3: ID3 或 MPEG sync + if header.startswith(b'ID3') or (header[0:2] == b'\xFF\xFB' or header[0:2] == b'\xFF\xF3'): + return True + + # M4A/AAC: ftyp + if b'ftyp' in header: + return True + + return False + except (IOError, OSError): + return False def read_and_config_file(args, input_path, decode=0): """ @@ -60,7 +95,7 @@ def read_and_config_file(args, input_path, decode=0): processed_list = librosa.util.find_files(input_path, ext="flac") else: # If it's a single file and it's a .wav or .flac, add to processed list - if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"): + if is_audio_file(input_path): processed_list.append(input_path) else: # Read file paths from the input text file (one path per line)