Memory usage improvements (#175)

* Memory usage improvements * Update analyze.py Added the config sample rate param. --------- Co-authored-by: Josef Haupt <[email protected]>
kahst · Nov 30, 2023 · f42e4bf · f42e4bf
1 parent a5861fc
commit f42e4bf
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 44 deletions.
diff --git a/analyze.py b/analyze.py
@@ -199,7 +199,7 @@ def getSortedTimestamps(results: dict[str, list]):
     return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
 
 
-def getRawAudioFromFile(fpath: str):
+def getRawAudioFromFile(fpath: str, offset, duration):
     """Reads an audio file.
 
     Reads the file and splits the signal into chunks.
@@ -211,7 +211,7 @@ def getRawAudioFromFile(fpath: str):
         The signal split into a list of chunks.
     """
     # Open file
-    sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE)
+    sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE, offset, duration)
 
     # Split into raw audio chunks
     chunks = audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
@@ -256,64 +256,59 @@ def analyzeFile(item):
 
     # Start time
     start_time = datetime.datetime.now()
+    offset = 0
+    duration = cfg.FILE_SPLITTING_DURATION
+    start, end = 0, cfg.SIG_LENGTH
+    fileLengthSeconds = audio.getAudioFileLength(fpath, cfg.SAMPLE_RATE)
+    results = {}
 
     # Status
     print(f"Analyzing {fpath}", flush=True)
 
-    try:
-        # Open audio file and split into 3-second chunks
-        chunks = getRawAudioFromFile(fpath)
-
-    # If no chunks, show error and skip
-    except Exception as ex:
-        print(f"Error: Cannot open audio file {fpath}", flush=True)
-        utils.writeErrorLog(ex)
-
-        return False
-
     # Process each chunk
     try:
-        start, end = 0, cfg.SIG_LENGTH
-        results = {}
-        samples = []
-        timestamps = []
+        while offset < fileLengthSeconds: 
+            chunks = getRawAudioFromFile(fpath, offset, duration)
+            samples = []
+            timestamps = []
 
-        for chunk_index, chunk in enumerate(chunks):
-            # Add to batch
-            samples.append(chunk)
-            timestamps.append([start, end])
+            for chunk_index, chunk in enumerate(chunks):
+                # Add to batch
+                samples.append(chunk)
+                timestamps.append([start, end])
 
-            # Advance start and end
-            start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
-            end = start + cfg.SIG_LENGTH
+                # Advance start and end
+                start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
+                end = start + cfg.SIG_LENGTH
 
-            # Check if batch is full or last chunk
-            if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
-                continue
+                # Check if batch is full or last chunk
+                if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
+                    continue
 
-            # Predict
-            p = predict(samples)
+                # Predict
+                p = predict(samples)
 
-            # Add to results
-            for i in range(len(samples)):
-                # Get timestamp
-                s_start, s_end = timestamps[i]
+                # Add to results
+                for i in range(len(samples)):
+                    # Get timestamp
+                    s_start, s_end = timestamps[i]
 
-                # Get prediction
-                pred = p[i]
+                    # Get prediction
+                    pred = p[i]
 
-                # Assign scores to labels
-                p_labels = zip(cfg.LABELS, pred)
+                    # Assign scores to labels
+                    p_labels = zip(cfg.LABELS, pred)
 
-                # Sort by score
-                p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
+                    # Sort by score
+                    p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
 
-                # Store top 5 results and advance indices
-                results[str(s_start) + "-" + str(s_end)] = p_sorted
+                    # Store top 5 results and advance indices
+                    results[str(s_start) + "-" + str(s_end)] = p_sorted
 
-            # Clear batch
-            samples = []
-            timestamps = []
+                # Clear batch
+                samples = []
+                timestamps = []
+            offset = offset + duration
 
     except Exception as ex:
         # Write error log

diff --git a/audio.py b/audio.py
@@ -28,6 +28,12 @@ def openAudioFile(path: str, sample_rate=48000, offset=0.0, duration=None):
 
     return sig, rate
 
+def getAudioFileLength(path, sample_rate=48000):    
+
+    # Open file with librosa (uses ffmpeg or libav)
+    import librosa
+
+    return librosa.get_duration(filename=path, sr=sample_rate)
 
 def get_sample_rate(path: str):
     import librosa

diff --git a/config.py b/config.py
@@ -90,6 +90,12 @@
 # Might only be useful for GPU inference.
 BATCH_SIZE: int = 1
 
+
+# Number of seconds to load from a file at a time
+# Files will be loaded into memory in segments that are only as long as this value
+# Lowering this value results in lower memory usage
+FILE_SPLITTING_DURATION: int = 600
+
 # Specifies the output format. 'table' denotes a Raven selection table,
 # 'audacity' denotes a TXT file with the same format as Audacity timeline labels
 # 'csv' denotes a generic CSV file with start, end, species and confidence.