Skip to content

Commit

Permalink
Memory usage improvements (#175)
Browse files Browse the repository at this point in the history
* Memory usage improvements

* Update analyze.py

Added the config sample rate param.

---------

Co-authored-by: Josef Haupt <[email protected]>
  • Loading branch information
tgruetzm and Josef-Haupt authored Nov 30, 2023
1 parent a5861fc commit f42e4bf
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 44 deletions.
83 changes: 39 additions & 44 deletions analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def getSortedTimestamps(results: dict[str, list]):
return sorted(results, key=lambda t: float(t.split("-", 1)[0]))


def getRawAudioFromFile(fpath: str):
def getRawAudioFromFile(fpath: str, offset, duration):
"""Reads an audio file.
Reads the file and splits the signal into chunks.
Expand All @@ -211,7 +211,7 @@ def getRawAudioFromFile(fpath: str):
The signal split into a list of chunks.
"""
# Open file
sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE)
sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE, offset, duration)

# Split into raw audio chunks
chunks = audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
Expand Down Expand Up @@ -256,64 +256,59 @@ def analyzeFile(item):

# Start time
start_time = datetime.datetime.now()
offset = 0
duration = cfg.FILE_SPLITTING_DURATION
start, end = 0, cfg.SIG_LENGTH
fileLengthSeconds = audio.getAudioFileLength(fpath, cfg.SAMPLE_RATE)
results = {}

# Status
print(f"Analyzing {fpath}", flush=True)

try:
# Open audio file and split into 3-second chunks
chunks = getRawAudioFromFile(fpath)

# If no chunks, show error and skip
except Exception as ex:
print(f"Error: Cannot open audio file {fpath}", flush=True)
utils.writeErrorLog(ex)

return False

# Process each chunk
try:
start, end = 0, cfg.SIG_LENGTH
results = {}
samples = []
timestamps = []
while offset < fileLengthSeconds:
chunks = getRawAudioFromFile(fpath, offset, duration)
samples = []
timestamps = []

for chunk_index, chunk in enumerate(chunks):
# Add to batch
samples.append(chunk)
timestamps.append([start, end])
for chunk_index, chunk in enumerate(chunks):
# Add to batch
samples.append(chunk)
timestamps.append([start, end])

# Advance start and end
start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
end = start + cfg.SIG_LENGTH
# Advance start and end
start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
end = start + cfg.SIG_LENGTH

# Check if batch is full or last chunk
if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
continue
# Check if batch is full or last chunk
if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
continue

# Predict
p = predict(samples)
# Predict
p = predict(samples)

# Add to results
for i in range(len(samples)):
# Get timestamp
s_start, s_end = timestamps[i]
# Add to results
for i in range(len(samples)):
# Get timestamp
s_start, s_end = timestamps[i]

# Get prediction
pred = p[i]
# Get prediction
pred = p[i]

# Assign scores to labels
p_labels = zip(cfg.LABELS, pred)
# Assign scores to labels
p_labels = zip(cfg.LABELS, pred)

# Sort by score
p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
# Sort by score
p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)

# Store top 5 results and advance indices
results[str(s_start) + "-" + str(s_end)] = p_sorted
# Store top 5 results and advance indices
results[str(s_start) + "-" + str(s_end)] = p_sorted

# Clear batch
samples = []
timestamps = []
# Clear batch
samples = []
timestamps = []
offset = offset + duration

except Exception as ex:
# Write error log
Expand Down
6 changes: 6 additions & 0 deletions audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def openAudioFile(path: str, sample_rate=48000, offset=0.0, duration=None):

return sig, rate

def getAudioFileLength(path, sample_rate=48000):

# Open file with librosa (uses ffmpeg or libav)
import librosa

return librosa.get_duration(filename=path, sr=sample_rate)

def get_sample_rate(path: str):
import librosa
Expand Down
6 changes: 6 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@
# Might only be useful for GPU inference.
BATCH_SIZE: int = 1


# Number of seconds to load from a file at a time
# Files will be loaded into memory in segments that are only as long as this value
# Lowering this value results in lower memory usage
FILE_SPLITTING_DURATION: int = 600

# Specifies the output format. 'table' denotes a Raven selection table,
# 'audacity' denotes a TXT file with the same format as Audacity timeline labels
# 'csv' denotes a generic CSV file with start, end, species and confidence.
Expand Down

0 comments on commit f42e4bf

Please sign in to comment.