Skip to content

Commit

Permalink
294 add zeros instead of noise to short samples (#313)
Browse files Browse the repository at this point in the history
* Added option to turn of noise while padding signal

* refactor

---------

Co-authored-by: Josef Haupt <[email protected]>
  • Loading branch information
Josef-Haupt and Josef Haupt authored Apr 17, 2024
1 parent 762ad72 commit 196f5ca
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 15 deletions.
39 changes: 24 additions & 15 deletions audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def saveSignal(sig, fname: str):
sf.write(fname, sig, 48000, "PCM_16")


def noise(sig, shape, amount=None):
def pad(sig, seconds, srate, amount=None):
"""Creates noise.
Creates a noise vector with the given shape.
Expand All @@ -70,17 +70,28 @@ def noise(sig, shape, amount=None):
Returns:
An numpy array of noise with the given shape.
"""
# Random noise intensity
if amount == None:
amount = RANDOM.uniform(0.1, 0.5)

# Create Gaussian noise
try:
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape)
except:
noise = np.zeros(shape)
target_len = int(srate * seconds)

return noise.astype("float32")
if len(sig) < target_len:
noise_shape = target_len - len(sig)

if not cfg.USE_NOISE:
noise = np.zeros(noise_shape)
else:
# Random noise intensity
if amount == None:
amount = RANDOM.uniform(0.1, 0.5)

# Create Gaussian noise
try:
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, noise_shape)
except:
noise = np.zeros(noise_shape)

return np.hstack((sig, noise.astype("float32")))

return sig


def splitSignal(sig, rate, seconds, overlap, minlen):
Expand All @@ -105,9 +116,7 @@ def splitSignal(sig, rate, seconds, overlap, minlen):
if len(split) < int(minlen * rate) and len(sig_splits) > 0:
break

# Signal chunk too short?
if len(split) < int(rate * seconds):
split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5)))
split = pad(split, seconds, rate, 0.5)

sig_splits.append(split)

Expand All @@ -128,8 +137,8 @@ def cropCenter(sig, rate, seconds):
sig = sig[start:end]

# Pad with noise
elif len(sig) < int(seconds * rate):
sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5)))
else:
sig = pad(sig, seconds, rate, 0.5)

return sig

Expand Down
4 changes: 4 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@
# Lowering this value results in lower memory usage
FILE_SPLITTING_DURATION: int = 600

# Whether to use noise to pad the signal
# If set to False, the signal will be padded with zeros
USE_NOISE: bool = False

# Specifies the output format. 'table' denotes a Raven selection table,
# 'audacity' denotes a TXT file with the same format as Audacity timeline labels
# 'csv' denotes a generic CSV file with start, end, species and confidence.
Expand Down

0 comments on commit 196f5ca

Please sign in to comment.