From 234f2d2457559b36b393fa109a54787ae889581c Mon Sep 17 00:00:00 2001
From: suke <32221726+wangsrGit119@users.noreply.github.com>
Date: Mon, 9 Dec 2024 17:37:27 +0800
Subject: [PATCH]  Add audio file detection by magic numbers

 Add audio file detection by magic numbers
# Add audio file detection by magic numbers

## Changes
- Add `is_audio_file()` function to detect audio files by checking file headers
- Support WAV, FLAC, MP3, M4A/AAC formats
- Replace extension-based detection with content-based detection in `read_and_config_file()`

## Why
More reliable audio file detection without relying on file extensions

## Test
- Tested with various audio formats (WAV, MP3)
---
 clearvoice/dataloader/misc.py | 37 ++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/clearvoice/dataloader/misc.py b/clearvoice/dataloader/misc.py
index 4315161..3ed6d80 100644
--- a/clearvoice/dataloader/misc.py
+++ b/clearvoice/dataloader/misc.py
@@ -11,6 +11,41 @@
 import os 
 import sys
 import librosa
+import struct
+
+def is_audio_file(file_path):
+    """
+    通过检查文件头部的魔数来判断文件是否为音频文件
+    
+    支持的格式:
+    - WAV (RIFF header)
+    - FLAC (fLaC header)
+    - MP3 (ID3 or MPEG sync)
+    - M4A/AAC (ftyp header)
+    """
+    try:
+        with open(file_path, 'rb') as f:
+            header = f.read(12)  # 读取前12个字节
+            
+            # WAV: RIFF xxxxWAVE
+            if header.startswith(b'RIFF') and b'WAVE' in header:
+                return True
+                
+            # FLAC: fLaC
+            if header.startswith(b'fLaC'):
+                return True
+                
+            # MP3: ID3 或 MPEG sync
+            if header.startswith(b'ID3') or (header[0:2] == b'\xFF\xFB' or header[0:2] == b'\xFF\xF3'):
+                return True
+                
+            # M4A/AAC: ftyp
+            if b'ftyp' in header:
+                return True
+                
+        return False
+    except (IOError, OSError):
+        return False
 
 def read_and_config_file(args, input_path, decode=0):
     """
@@ -60,7 +95,7 @@ def read_and_config_file(args, input_path, decode=0):
                 processed_list = librosa.util.find_files(input_path, ext="flac")
         else:
             # If it's a single file and it's a .wav or .flac, add to processed list
-            if input_path.lower().endswith(".wav") or input_path.lower().endswith(".flac"):
+            if is_audio_file(input_path):
                 processed_list.append(input_path)
             else:
                 # Read file paths from the input text file (one path per line)