From 157f129d7462c313087ecb028868a5ac361c9b14 Mon Sep 17 00:00:00 2001 From: Peter Sobot Date: Wed, 12 Apr 2017 11:29:33 -0400 Subject: [PATCH] Init public commit. --- .gitignore | 97 ++++++++++ LICENSE | 21 +++ README.md | 116 ++++++++++++ lib/__init__.py | 0 lib/click_detector.py | 391 +++++++++++++++++++++++++++++++++++++++ lib/compare.py | 198 ++++++++++++++++++++ lib/constants.py | 12 ++ lib/deflac.py | 93 ++++++++++ lib/flacize.py | 130 +++++++++++++ lib/group_velcurves.py | 44 +++++ lib/loop.py | 251 +++++++++++++++++++++++++ lib/map_xfvel.py | 11 ++ lib/midi_helpers.py | 29 +++ lib/numpy_helpers.py | 74 ++++++++ lib/quantize.py | 116 ++++++++++++ lib/record.py | 231 +++++++++++++++++++++++ lib/send_notes.py | 340 ++++++++++++++++++++++++++++++++++ lib/sfzparser.py | 127 +++++++++++++ lib/spectrogram.py | 99 ++++++++++ lib/starts_with_click.py | 17 ++ lib/truncate.py | 34 ++++ lib/utils.py | 96 ++++++++++ lib/volume_leveler.py | 114 ++++++++++++ lib/wavio.py | 42 +++++ record.py | 101 ++++++++++ requirements.txt | 6 + samplescanner | 1 + 27 files changed, 2791 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 lib/__init__.py create mode 100644 lib/click_detector.py create mode 100644 lib/compare.py create mode 100644 lib/constants.py create mode 100644 lib/deflac.py create mode 100644 lib/flacize.py create mode 100644 lib/group_velcurves.py create mode 100644 lib/loop.py create mode 100644 lib/map_xfvel.py create mode 100644 lib/midi_helpers.py create mode 100644 lib/numpy_helpers.py create mode 100644 lib/quantize.py create mode 100644 lib/record.py create mode 100644 lib/send_notes.py create mode 100644 lib/sfzparser.py create mode 100644 lib/spectrogram.py create mode 100644 lib/starts_with_click.py create mode 100644 lib/truncate.py create mode 100644 lib/utils.py create mode 100644 lib/volume_leveler.py create mode 100644 lib/wavio.py create mode 100755 record.py create mode 100644 requirements.txt create mode 120000 samplescanner diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..05798f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,97 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cd50560 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2015-2017 Peter Sobot https://petersobot.com github@petersobot.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ad890f --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +# SampleScanner + +![SampleScanner Logo](https://cloud.githubusercontent.com/assets/213293/24964018/1dcb4092-1f6e-11e7-8b3b-47704e6c8aeb.png) + + +SampleScanner is a command-line tool to turn MIDI instruments (usually hardware) into virtual (software) instruments automatically. It's similar to the now-discontinued [Redmatica's _AutoSampler_](http://www.soundonsound.com/reviews/redmatica-autosampler) software (now part of Apple's [MainStage](https://441k.com/sampling-synths-with-auto-sampler-in-mainstage-3-412deb8f900e) package), but open-source and cross-platform. + +## Features + + - Uses native system integration (via `rtmidi` and `pyAudio`) for compatibility with all audio and MIDI devices + - Outputs to the open-source [sfz 2.0 sample format](http://ariaengine.com/overview/sfz-format/), playable by [Sforzando](https://www.plogue.com/products/sforzando/) (and others) + - Optional FLAC compression (on by default) to reduce sample set file size by up to 75% + - Flexible configuration options and extensive command-line interface + - Expermimental looping algorithm to extend perpetual samples + - Clipping detection at sample time + - 100% Python to enable cross-platform compatibility (only tested on macOS at the moment) + +## Installation + +Requires a working `python`, `pip`, and `ffmpeg` to be installed on the system. + +``` +git clone git@github.com:psobot/SampleScanner +cd SampleScanner +pip install -r requirements.txt +``` + +## How to run + +Run `./samplescanner -h` for a full argument listing: + +```contentsof +usage: samplescanner [-h] [--program-number PROGRAM_NUMBER] + [--low-key LOW_KEY] [--high-key HIGH_KEY] + [--velocity-levels VELOCITY_LEVELS [VELOCITY_LEVELS ...]] + [--key-skip KEY_RANGE] [--max-attempts MAX_ATTEMPTS] + [--limit LIMIT] [--has-portamento] [--sample-asc] + [--no-flac] [--no-delete] [--loop] [--ignore-clicks] + [--midi-port-name MIDI_PORT_NAME] + [--midi-channel MIDI_CHANNEL] + [--audio-interface-name AUDIO_INTERFACE_NAME] + [--sample-rate SAMPLE_RATE] [--print-progress] + output_folder + +create SFZ files from external audio devices + +optional arguments: + -h, --help show this help message and exit + +Sampling Options: + --program-number PROGRAM_NUMBER + switch to a program number before recording + --low-key LOW_KEY key to start sampling from (key name, octave number) + --high-key HIGH_KEY key to stop sampling at (key name, octave number) + --velocity-levels VELOCITY_LEVELS [VELOCITY_LEVELS ...] + velocity levels (in [1, 127]) to sample + --key-skip KEY_RANGE number of keys covered by one sample + --max-attempts MAX_ATTEMPTS + maximum number of tries to resample a note + --limit LIMIT length in seconds of longest sample + --has-portamento play each note once before sampling to avoid + portamento sweeps between notes + --sample-asc sample notes from low to high (default false) + +Output Options: + output_folder name of output folder + --no-flac don't compress output to flac samples + --no-delete leave temporary .aif files in place after flac + compression + --loop attempt to loop sounds (should only be used with + sounds with infinite sustain) + --ignore-clicks turn off click/audio corruption checks (default false) + +MIDI/Audio IO Options: + --midi-port-name MIDI_PORT_NAME + name of MIDI device to use + --midi-channel MIDI_CHANNEL + MIDI channel to send messages on + --audio-interface-name AUDIO_INTERFACE_NAME + name of audio input device to use + --sample-rate SAMPLE_RATE + sample rate to use. audio interface must support this + rate. + +Misc Options: + --print-progress show text-based VU meters in terminal (default false, + can cause audio artifacts) +``` + +## Copyright and License + +tl;dr: SampleScanner is © 2015-2017 [Peter Sobot](https://petersobot.com), and released under the MIT License. + +```contentsof +The MIT License + +Copyright (c) 2015-2017 Peter Sobot https://petersobot.com github@petersobot.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +``` diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/click_detector.py b/lib/click_detector.py new file mode 100644 index 0000000..712059a --- /dev/null +++ b/lib/click_detector.py @@ -0,0 +1,391 @@ +import os +import pdb +import csv +import sys +import time +import numpy +import itertools +import traceback +from tqdm import tqdm +from collections import defaultdict +from tabulate import tabulate +import matplotlib.cm as cm +from spectrogram import plotstft, stft, logscale_spec +from itertools import islice + +from wavio import read_wave_file, read_flac_file +from numpy_helpers import sliding_window + +import matplotlib.pyplot as plt + +sampling_rate = 48000.0 +assume_stereo_frequency_match = True +CHUNK_SIZE = 2048 + +FFT_SIZE = 512 +SECOND_DERIVATIVE_THRESHOLD = -15000 +ARCTAN_STRETCH_X = 25 +PI_OVER_2 = 1.57079 + + +def timeit(method): + + def timed(*args, **kw): + ts = time.time() + result = method(*args, **kw) + te = time.time() + + print '%r (%r, %r) %2.2f sec' % \ + (method.__name__, args, kw, te - ts) + return result + + return timed + + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + + +def find_clicks_using_second_derivatives(*channels): + # Not very reliable. + binsize = 2 ** 10 + samplerate = 48000 + click_bins = set() + for channel in channels: + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + second_derivatives = numpy.diff(numpy.sum(ims, axis=1), 2) + for bin_index, bin in enumerate( + second_derivatives < SECOND_DERIVATIVE_THRESHOLD + ): + if bin: + click_bins.add(( + ((bin_index) * binsize / 2), + ((bin_index + 1) * binsize / 2) + )) + return click_bins + + +def rolling_window_slow(vertical_similarity): + window_size = 10 + return [ + (numpy.arctan(float(i) / len(vertical_similarity) + * ARCTAN_STRETCH_X) / PI_OVER_2) + * numpy.abs( + numpy.sum(slice) + ) + for i, slice in enumerate( + list( + window(vertical_similarity, window_size) + )[:-window_size * 2] + ) + ] + + +def rolling_window(vertical_similarity): + window_size = 10 + + to_sum = numpy.array(list( + vertical_similarity[offset:-(window_size - offset)] + for offset in xrange(0, window_size) + )) + + summed = numpy.abs(numpy.sum(to_sum, axis=0))[:-window_size * 2] + linspace = numpy.linspace(0, ARCTAN_STRETCH_X, len(summed)) + coefficients = numpy.arctan(linspace) / PI_OVER_2 + + return summed * coefficients + + +def find_clicks_old_unreliable(*channels): + binsize = 2 ** 10 + samplerate = 48000 + click_bins = set() + for channel in channels: + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + vertical_similarity = numpy.diff([ + numpy.sqrt(numpy.average((column - numpy.median(column)) ** 2)) + for column in ims + ]) + + threshold = 5 + for bin_index, value in enumerate(rolling_window(vertical_similarity)): + if value >= threshold: + click_bins.add(( + ((bin_index) * binsize / 2), + ((bin_index + 1) * binsize / 2) + )) + return click_bins + + +def find_clicks_windowed(*channels): + binsize = 2 ** 10 + samplerate = 48000 + click_bins = set() + for channel in channels: + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + vertical_similarity = numpy.diff([ + numpy.sqrt(numpy.average((column - numpy.median(column)) ** 2)) + for column in ims + ]) + + threshold = 40 + for bin_index, win in enumerate( + sliding_window( + rolling_window(vertical_similarity), + 10, + copy=False, + ) + ): + high_point = numpy.max(win) + avg_baseline = numpy.median(win) + if (high_point / avg_baseline) > threshold: + click_bins.add(( + ((bin_index) * binsize / 2), + ((bin_index + 1) * binsize / 2) + )) + return click_bins + +CLICK_FREQUENCY_CHECKS = [6000, 10666, 13000, 15000, 18619] +CLICK_FREQUENCY_THRESHOLD = 170 + + +def find_clicks_points(*channels): + binsize = 2 ** 10 + samplerate = 48000 + + bins_per_check = defaultdict(set) + for channel in channels: + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + for check in CLICK_FREQUENCY_CHECKS: + check_freq = next(i for i in freq if i > check) + check_freq_index = freq.index(check_freq) + + check_freq_values = [column[check_freq_index] for column in ims] + # plt.plot(check_freq_values) + # plt.show() + + for bin_index, value in enumerate(check_freq_values): + if value > CLICK_FREQUENCY_THRESHOLD: + bins_per_check[check].add(( + ((bin_index) * binsize / 2), + ((bin_index + 1) * binsize / 2) + )) + + return set().intersection(*bins_per_check.values()) + + +# 2am: finds clicks by checking a 4-bin window and expects the +# middle 2 bins to be louder for most of their vertical content (?) +# I'm really tired but this seems to work on the test dataset +def find_clicks(*channels): + binsize = 2 ** 10 + samplerate = 48000 + + click_bins = set() + initial_offset = 2 + for channel in channels: + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + ims = ims[initial_offset:] + + threshold = 1.01 + freq_count_pct_threshold = 0.095 + + win_size = 4 + for bin_index, rows in enumerate(sliding_window(ims, win_size)): + count_of_matching_freqs = 0 + + # todo: make extensible to different window sizes + middle = (rows[:, 1] + rows[:, 2]) / 2 + + middle_over_start = numpy.clip( + middle / rows[:, 0], + 0, + threshold + ) + middle_over_end = numpy.clip( + middle / rows[:, win_size - 1], + 0, + threshold + ) + + subtracted = (middle_over_start + middle_over_end) - ( + 2 * threshold + ) + count_of_matching_freqs = len(numpy.where( + subtracted >= 0 + )[0]) + pct_matching = (count_of_matching_freqs / float(len(freq))) + + if pct_matching < freq_count_pct_threshold: + click_bins.add(( + ((bin_index) * binsize / 2), + ((bin_index + 1) * binsize / 2) + )) + return click_bins + + +def find_clicks_convolution(*channels): + binsize = 2 ** 10 + samplerate = 48000 + + click_bins = set() + for channel in channels: + samples = channel + s = stft(channel, binsize) + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + threshold = 1.01 + freq_count_pct_threshold = 0.20 + + convolved = numpy.array( + [numpy.convolve(c, [0.5, 1, 0.5], 'valid') for c in ims] + ) + timebins, freqbins = numpy.shape(convolved) + colormap = "jet" + plt.figure(figsize=(15, 7.5)) + plt.imshow(numpy.transpose(convolved), origin="lower", aspect="auto", cmap=colormap, interpolation="none") + plt.colorbar() + + plt.xlabel("time (s)") + plt.ylabel("frequency (hz)") + plt.xlim([0, timebins-1]) + plt.ylim([0, freqbins]) + + xlocs = numpy.float32(numpy.linspace(0, timebins-1, 5)) + plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate]) + ylocs = numpy.int16(numpy.round(numpy.linspace(0, freqbins-1, 10))) + plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) + plt.show() + + # for bin_index, columns in enumerate(sliding_window(ims, 3)): + # count_of_matching_freqs = 0 + # for i in xrange(0, len(columns)): + # middle_over_start = columns[i][1] / columns[i][0] + # middle_over_end = columns[i][1] / columns[i][2] + # if middle_over_start > threshold and \ + # middle_over_end > threshold: + # count_of_matching_freqs += 1 + # if count_of_matching_freqs / float(len(freq)) \ + # > freq_count_pct_threshold: + # click_bins.add(( + # ((bin_index) * binsize / 2), + # ((bin_index + 1) * binsize / 2) + # )) + + return click_bins + + +def process_all(start, stop, *files): + start = int(start) + stop = int(stop) + total_results = defaultdict(set) + for file in tqdm(files, desc='Detecting clicks...'): + found_clicks = set() + if file.endswith('flac'): + stereo = read_flac_file(file) + else: + stereo = read_wave_file(file) + left = stereo[0] + right = stereo[1] + for bin_start, bin_end in find_clicks( + left[start:stop], + right[start:stop] + ): + found_clicks.add((bin_start, bin_end)) + if found_clicks: + total_results[file] = found_clicks + for file, clicks in total_results.iteritems(): + print file, 'has', len(clicks), 'clicks at', clicks + if not total_results: + print "No clicks found in", \ + files[0], "or", len(files) - 1, 'other files.' + return total_results + # print "To delete, run: " + # for file in total_results.keys(): + # print "rm \"%s\"" % file + + +def show_all(save, start, stop, *files): + start = int(start) + stop = int(stop) + + binsize = 2 ** 10 + samplerate = 48000 + total_results = defaultdict(set) + for file in tqdm(files, desc='Detecting clicks...'): + found_clicks = set() + if file.endswith('flac'): + stereo = read_flac_file(file) + else: + stereo = read_wave_file(file) + left = stereo[0][start:stop] + right = stereo[1][start:stop] + + for channel in (right,): + s = stft(channel, binsize) + samples = channel + colormap = "jet" + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20. * numpy.log10(numpy.abs(sshow) / 10e-6) # amplitude to db + + timebins, freqbins = numpy.shape(ims) + + plt.figure(figsize=(15, 7.5)) + plt.imshow(numpy.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") + plt.colorbar() + + plt.xlabel("time (s)") + plt.ylabel("frequency (hz)") + plt.xlim([0, timebins-1]) + plt.ylim([0, freqbins]) + + xlocs = numpy.float32(numpy.linspace(0, timebins-1, 5)) + plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate]) + ylocs = numpy.int16(numpy.round(numpy.linspace(0, freqbins-1, 10))) + plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) + + for bin_start, bin_end in find_clicks(channel): + if not save: + plt.axvline(bin_start / (2 ** 10)) + found_clicks.add((bin_start, bin_end)) + + if found_clicks: + total_results[file] = found_clicks + if save: + plotstft(samplerate, left, plotpath=file + '.png') + plt.close('all') + else: + plt.show() + for file, clicks in total_results.iteritems(): + print file, 'has', len(clicks), 'clicks at', clicks + if not total_results: + print "No clicks found in", \ + files[0], "or", len(files) - 1, 'other files.' + return total_results + + +if __name__ == "__main__": + if sys.argv[1] == 'show': + sys.exit(1 if show_all(False, *sys.argv[2:]) else 0) + if sys.argv[1] == 'save': + sys.exit(1 if show_all(True, *sys.argv[2:]) else 0) + else: + sys.exit(1 if process_all(*sys.argv[1:]) else 0) diff --git a/lib/compare.py b/lib/compare.py new file mode 100644 index 0000000..1a452cb --- /dev/null +++ b/lib/compare.py @@ -0,0 +1,198 @@ +import os +import csv +import sys +import numpy +import itertools +import traceback +from tqdm import tqdm +from tabulate import tabulate + +from utils import normalized, trim_mono_data +from wavio import read_wave_file + +import matplotlib.pyplot as plt + +sampling_rate = 44100.0 +assume_stereo_frequency_match = True + + +def aligned_sublists(*lists): + min_peak_index = min([numpy.argmax(list) for list in lists]) + return [list[(numpy.argmax(list) - min_peak_index):] for list in lists] + + +def peak_diff(lista, listb): + return float(numpy.amax(lista)) / float(numpy.amax(listb)) + + +def normalized_difference(lista, listb): + lista = trim_mono_data(normalized(lista)) + listb = trim_mono_data(normalized(listb)) + + compare = min(len(lista), len(listb)) + return numpy.sum( + numpy.absolute( + lista[:compare] - listb[:compare] + ) + ) / compare + + +def fundamental_frequency(list, sampling_rate=1): + w = numpy.fft.rfft(list) + freqs = numpy.fft.fftfreq(len(w)) + + # Find the peak in the coefficients + # idx = numpy.argmax(numpy.abs(w[:len(w) / 2])) + idx = numpy.argmax(numpy.abs(w)) + freq = freqs[idx] + return abs(freq * sampling_rate) + + +def freq_diff(lista, listb, only_compare_first=100000): + return fundamental_frequency(lista[:only_compare_first]) /\ + fundamental_frequency(listb[:only_compare_first]) + + +def shift_freq(list, factor): + num_output_points = int(float(len(list)) / factor) + output_x_points = numpy.linspace(0, len(list), num_output_points) + input_x_points = numpy.linspace(0, len(list), len(list)) + + return numpy.interp( + output_x_points, + input_x_points, + list, + ) + + +def generate_diffs(filea, fileb): + wavea = read_wave_file(filea, True) + waveb = read_wave_file(fileb, True) + + diffl = normalized_difference(*aligned_sublists(wavea[0], waveb[0])) + diffr = normalized_difference(*aligned_sublists(wavea[1], waveb[1])) + + peakl = peak_diff(wavea[0], waveb[0]) + peakr = peak_diff(wavea[1], waveb[1]) + + # for line in aligned_sublists(wavea[0], waveb[0]): + # plt.plot(normalized(line[:10000])) + # plt.show() + + # louder_a = wavea[0] if numpy.amax(wavea[0]) > numpy.amax(wavea[1]) else wavea[1] + # louder_b = waveb[0] if numpy.amax(waveb[0]) > numpy.amax(waveb[1]) else waveb[1] + + # freqd = freq_diff(normalized(louder_a), normalized(louder_b)) + + return ( + diffl, diffr, + peakl, peakr, + 0, # freqd, + os.path.split(filea)[-1], os.path.split(fileb)[-1] + ) + + +def generate_pairs(infiles): + for filea, fileb in tqdm(list(itertools.combinations(infiles, 2))): + yield generate_diffs(filea, fileb) + + +def process_all(aifs): + results = [] + try: + for result in generate_pairs(aifs): + results.append(result) + except KeyboardInterrupt as e: + traceback.print_exc(e) + pass + + headers = ( + '% diff L', '% diff R', + 'x peak L', 'x peak R', + 'x freq', + 'file a', 'file b' + ) + results = sorted( + results, + key=lambda (dl, dr, + pl, pr, + freqd, + fa, fb): dl + dr + abs(freqd - 1)) + with open('results.csv', 'wb') as f: + writer = csv.writer(f) + writer.writerows([headers]) + writer.writerows(results) + + print "%d results" % len(results) + print tabulate( + results, + headers=headers, + floatfmt='.4f' + ) + + +def graph_ffts(): + files = ['A1_v111_15.00s.aif', 'A2_v31_15.00s.aif'] + for file in files: + stereo = read_wave_file(os.path.join(root_dir, file)) + left = stereo[0] + right = stereo[1] + list = left[:100000] + + w = numpy.fft.rfft(list) + freqs = numpy.fft.fftfreq(len(w)) + + # Find the peak in the coefficients + # idx = numpy.argmax(numpy.abs(w[:len(w) / 2])) + idx = numpy.argmax(numpy.abs(w)) + freq = freqs[idx] + plt.plot(w) + print freq + print \ + fundamental_frequency(normalized(list)), \ + fundamental_frequency(normalized(left + right)) + # plt.show() + + +def freq_shift(): + files = ['A1_v111_15.00s.aif', 'A1_v95_15.00s.aif'] + wavea, waveb = [ + read_wave_file(os.path.join(root_dir, file)) for file in files + ] + + louder_a = wavea[0] if numpy.amax(wavea[0]) > numpy.amax(wavea[1]) else wavea[1] + louder_b = waveb[0] if numpy.amax(waveb[0]) > numpy.amax(waveb[1]) else waveb[1] + + freqd = freq_diff(normalized(louder_a), normalized(louder_b)) + + waveb_shifted = [shift_freq(channel, freqd) for channel in waveb] + louder_shifted_b = waveb_shifted[0] if numpy.amax(waveb_shifted[0]) > numpy.amax(waveb_shifted[1]) else waveb_shifted[1] + + shifted_freqd = freq_diff(normalized(louder_a), normalized(louder_shifted_b)) + + lefts_aligned = aligned_sublists(wavea[0], waveb[0]) + rights_aligned = aligned_sublists(wavea[1], waveb[1]) + shifted_lefts_aligned = aligned_sublists(wavea[0], waveb_shifted[0]) + + diffl = normalized_difference(*aligned_sublists(wavea[0], waveb[0])) + diffr = normalized_difference(*aligned_sublists(wavea[1], waveb[1])) + + plt.plot(normalized(rights_aligned[0][:10000])) + plt.plot(normalized(rights_aligned[1][:10000])) + plt.plot(numpy.absolute( + normalized(rights_aligned[0][:10000]) - normalized(rights_aligned[1][:10000]) + )) + plt.show() + + shifted_diffl = normalized_difference(*aligned_sublists(wavea[0], waveb_shifted[0])) + shifted_diffr = normalized_difference(*aligned_sublists(wavea[1], waveb_shifted[1])) + + print files + print 'diffs\t\t', diffl, diffr + print 'shifted diffs\t', shifted_diffl, shifted_diffr + print 'freqs', freqd + print 'shifted freqs', shifted_freqd + + +if __name__ == "__main__": + process_all(sys.argv[1:]) diff --git a/lib/constants.py b/lib/constants.py new file mode 100644 index 0000000..a4bf9a1 --- /dev/null +++ b/lib/constants.py @@ -0,0 +1,12 @@ +import numpy + +neg80point8db = 0.00009120108393559096 +bit_depth = 16 +default_silence_threshold = (neg80point8db * (2 ** (bit_depth - 1))) * 4 +NUMPY_DTYPE = numpy.int16 if bit_depth == 16 else numpy.int24 +SAMPLE_RATE = 48000 + +EXIT_ON_CLIPPING = True +EXIT_ON_BALANCE_BAD = False # Doesn't work yet +CLIPPING_CHECK_NOTE = 48 # C4 +CLIPPING_THRESHOLD = 0.85 diff --git a/lib/deflac.py b/lib/deflac.py new file mode 100644 index 0000000..3de6b4b --- /dev/null +++ b/lib/deflac.py @@ -0,0 +1,93 @@ +import os +import sys +import wave +import numpy +import argparse +import subprocess +from tqdm import tqdm +from sfzparser import SFZFile +from wavio import read_wave_file +from utils import normalized +from record import RATE, save_to_file +from constants import bit_depth + + +def full_path(sfzfile, filename): + if os.path.isdir(sfzfile): + return os.path.join(sfzfile, filename) + else: + return os.path.join(os.path.dirname(sfzfile), filename) + + +def length_of(filename): + return wave.open(filename).getnframes() + + +def split_flac(input_filename, start_time, end_time, output_filename): + commandline = [ + 'ffmpeg', + '-y', + '-i', + input_filename, + '-ss', + str(start_time), + '-to', + str(end_time), + output_filename + ] + # sys.stderr.write("Calling '%s'...\n" % ' '.join(commandline)) + subprocess.call( + commandline, + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w') + ) + + +def normalize_file(filename): + data = read_wave_file(filename, True) + if len(data): + normalized_data = normalized(data) * (2 ** (bit_depth - 1) - 1) + else: + normalized_data = data + save_to_file(filename, 2, normalized_data) + +ANTI_CLICK_OFFSET = 3 + + +def split_sample(region, path): + new_file_name = "%s_%s_%s.wav" % ( + region.attributes['key'], + region.attributes['lovel'], + region.attributes['hivel'] + ) + output_file_path = full_path(path, new_file_name) + if not os.path.isfile(output_file_path): + split_flac( + full_path(path, region.attributes['sample']), + float(region.attributes['offset']) / float(RATE), + float(region.attributes['end']) / float(RATE), + output_file_path + ) + normalize_file(output_file_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='split up flac-ized SFZ file into wavs' + ) + parser.add_argument( + 'files', + type=str, + help='sfz files to process', + nargs='+' + ) + args = parser.parse_args() + + all_regions = [ + regions + for filename in args.files + for group in SFZFile(open(filename).read()).groups + for regions in group.regions + ] + for regions in tqdm(all_regions, desc='De-flacing...'): + split_sample(regions, filename) diff --git a/lib/flacize.py b/lib/flacize.py new file mode 100644 index 0000000..469cefc --- /dev/null +++ b/lib/flacize.py @@ -0,0 +1,130 @@ +import os +import sys +import wave +import time +import argparse +import subprocess +from tqdm import tqdm +from sfzparser import SFZFile, Group +from wavio import read_wave_file +from utils import group_by_attr, note_name + + +def full_path(sfzfile, filename): + if os.path.isdir(sfzfile): + return os.path.join(sfzfile, filename) + else: + return os.path.join(os.path.dirname(sfzfile), filename) + + +def length_of(filename): + return wave.open(filename).getnframes() + + +def create_flac(concat_filename, output_filename): + commandline = [ + 'ffmpeg', + '-y', + '-f', + 'concat', + '-i', + concat_filename, + '-c:a', + 'flac', + '-compression_level', '12', + output_filename + ] + # sys.stderr.write("Calling '%s'...\n" % ' '.join(commandline)) + subprocess.call( + commandline, + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w') + ) + + +def flacize_after_sampling( + output_folder, + groups, + sfzfile, + cleanup_aif_files=True +): + new_groups = [] + + old_paths_to_unlink = [ + full_path(output_folder, r.attributes['sample']) + for group in groups + for r in group.regions + ] + + for group in groups: + # Make one FLAC file per key, to get more compression. + output = sum([list(concat_samples( + key_regions, output_folder, note_name(key) + )) + for key, key_regions in + group_by_attr(group.regions, [ + 'key', 'pitch_keycenter' + ]).iteritems()], []) + new_groups.append(Group(group.attributes, output)) + + with open(sfzfile + '.flac.sfz', 'w') as file: + file.write("\n".join([str(group) for group in new_groups])) + + if cleanup_aif_files: + for path in old_paths_to_unlink: + try: + os.unlink(path) + except OSError as e: + print "Could not unlink path: %s: %s" % (path, e) + + +ANTI_CLICK_OFFSET = 3 + + +def concat_samples(regions, path, name=None): + if name is None: + output_filename = 'all_samples_%f.flac' % time.time() + else: + output_filename = '%s.flac' % name + + concat_filename = 'concat.txt' + + with open(concat_filename, 'w') as outfile: + global_offset = 0 + for region in regions: + sample = region.attributes['sample'] + + sample_data = read_wave_file(full_path(path, sample)) + + sample_length = len(sample_data[0]) + region.attributes['offset'] = global_offset + region.attributes['end'] = ( + global_offset + sample_length - ANTI_CLICK_OFFSET + ) + # TODO: make sure endpoint is a zero crossing to prevent clicks + region.attributes['sample'] = output_filename + outfile.write("file '%s'\n" % full_path(path, sample)) + global_offset += sample_length + + create_flac(concat_filename, full_path(path, output_filename)) + os.unlink(concat_filename) + + return regions + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='flac-ize SFZ files into one sprite sample' + ) + parser.add_argument('files', type=str, help='files to process', nargs='+') + args = parser.parse_args() + + for filename in args.files: + for group in SFZFile(open(filename).read()).groups: + # Make one FLAC file per key, to get more compression. + output = sum([list(concat_samples(regions, filename, note_name(key))) + for key, regions in + tqdm(group_by_attr(group.regions, 'key').iteritems())], []) + print group.just_group() + for region in output: + print region diff --git a/lib/group_velcurves.py b/lib/group_velcurves.py new file mode 100644 index 0000000..cff6a95 --- /dev/null +++ b/lib/group_velcurves.py @@ -0,0 +1,44 @@ +import argparse +from sfzparser import parse, Group +from quantize import group_by_attr + +parser = argparse.ArgumentParser( + description='quantize and compress SFZ files' +) +parser.add_argument('files', type=str, help='files to process', nargs='+') +args = parser.parse_args() + + +def should_group_key(key): + return ( + key.startswith('amp_velcurve_') or + key == 'key' or + key == 'ampeg_release' + ) + + +def group_by_pitch(regions): + for key, regions in group_by_attr(regions, 'key').iteritems(): + # Group together all amp_velcurve_* and key params. + yield Group(dict([ + (key, value) + for region in regions + for key, value in region.attributes.iteritems() + if should_group_key(key) + ] + DEFAULT_ATTRIBUTES.items()), [ + region.without_attributes(should_group_key) for region in regions + ]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='flac-ize SFZ files into one sprite sample' + ) + parser.add_argument('files', type=str, help='files to process', nargs='+') + args = parser.parse_args() + + for filename in args.files: + groups = parse(open(filename).read()) + regions = sum([group.flattened_regions() for group in groups], []) + for group in group_by_pitch(regions): + print group diff --git a/lib/loop.py b/lib/loop.py new file mode 100644 index 0000000..fdd8604 --- /dev/null +++ b/lib/loop.py @@ -0,0 +1,251 @@ +import sys +import numpy +from tqdm import tqdm +from truncate import read_wave_file +from compare import fundamental_frequency + +import matplotlib.pyplot as plt +QUANTIZE_FACTOR = 8 + + +def compare_windows(window_a, window_b): + return numpy.sqrt(numpy.mean(numpy.power(window_a - window_b, 2))) + + +def slide_window(file, period, start_at=0, end_before=0): + for power in reversed(xrange(7, 10)): + multiple = 2 ** power + window_size = int(period * multiple) + # Uncomment this to search from the start_at value to the end_before + # rather than just through one window's length + # end_range = len(file) - (window_size * 2) - end_before + end_range = start_at + window_size + for i in xrange(start_at, end_range): + yield power, i, window_size + + +def window_match(file): + period = (1.0 / fundamental_frequency(file, 1)) * 2 + print period, 'period in samples' + + winner = None + + window_positions = list( + slide_window(file, period, len(file) / 2, len(file) / 8) + ) + for power, i, window_size in tqdm(window_positions): + window_start = find_similar_sample_index(file, i, i + window_size) + window_end = find_similar_sample_index(file, i, i + (window_size * 2)) + effective_size = window_end - window_start + + difference = compare_windows( + file[i:i + effective_size], + file[window_start:window_end] + ) / effective_size + if winner is None or difference < winner[0]: + winner = ( + difference, + effective_size, + i, + abs(file[i] - file[window_start]) + ) + print 'new winner', winner + + lowest_difference, winning_window_size, winning_index, gap = winner + + print "Best loop match:", lowest_difference + print "window size", winning_window_size + print "winning index", winning_index + print "winning gap", gap + return winning_index, winning_window_size + + +def slope_at_index(file, i): + return (file[i + 1] - file[i - 1]) / 2 + + +def find_similar_sample_index( + file, + reference_index, + search_around_index, + search_size=100 # samples +): + reference_slope = slope_at_index(file, reference_index) > 0 + best_match = None + search_range = xrange( + search_around_index - search_size, + search_around_index + search_size + ) + for i in search_range: + slope = slope_at_index(file, i) > 0 + if slope != reference_slope: + continue + + abs_diff = abs(file[i] - file[reference_index]) + + if best_match is not None: + _, best_abs_diff = best_match + if abs_diff < best_abs_diff: + best_match = (i, abs_diff) + else: + best_match = (i, abs_diff) + return best_match[0] if best_match is not None else search_around_index + + +def zero_crossing_match(file): + period = (1.0 / fundamental_frequency(file, 1)) * 2 + print period, 'period in samples' + + period_multiple = 64 + period = period * period_multiple + + for i in reversed(xrange(2 * len(file) / 3, 5 * len(file) / 6)): + if file[i] >= 0 and file[i + 1] < 0 and \ + file[int(i + period)] >= 0 and \ + file[int(i + 1 + period)] < 0 and \ + file[int(i + period * 2)] >= 0 and \ + file[int(i + 1 + period * 2)] < 0: + return i, int(period) + + +def fast_autocorrelate(x): + """ + Compute the autocorrelation of the signal, based on the properties of the + power spectral density of the signal. + + Note that the input's length may be reduced before the correlation is performed + due to a pathalogical case in numpy.fft: http://stackoverflow.com/a/23531074/679081 + + > The FFT algorithm used in np.fft performs very well (meaning O(n log n)) + > when the input length has many small prime factors, and very bad + > (meaning a naive DFT requiring O(n^2)) when the input size is a prime number. + """ + + # This is one simple way to ensure that the input array + # has a length with many small prime factors, although it + # doesn't guarantee that (also hopefully we don't chop too much) + optimal_input_length = int(numpy.sqrt(len(x))) ** 2 + x = x[:optimal_input_length] + xp = x - numpy.mean(x) + f = numpy.fft.fft(xp) + p = numpy.absolute(numpy.power(f, 2)) + pi = numpy.fft.ifft(p) + result = numpy.real(pi)[:x.size / 2] / numpy.sum(numpy.power(xp, 2)) + return result + + +def find_argmax_after(file, offset): + return numpy.argmax(file[offset:]) + offset + + +def autocorrelated_loop(file, search_start, min_loop_width_in_seconds=0.2): + # Strategy: + # 1) run an autocorrelation on the file. + # 3) Find argmax of the autocorrelation + # 4) define some peak_width and find the next highest peak after current + # 5) define the loop bounds as from the first peak to the second peak + # 6) massage the loop bounds using find_similar_sample_index + # 7) ??? + # 8) Profit! + autocorrelation = fast_autocorrelate(file) + return find_loop_from_autocorrelation( + file, + autocorrelation, + search_start, + min_loop_width_in_seconds + ) + + +def find_loop_from_autocorrelation( + file, + autocorrelation, + search_start, + min_loop_width_in_seconds=0.2, + sample_rate=48000 +): + search_start /= 2 + max_autocorrelation_peak_width = int( + min_loop_width_in_seconds * sample_rate + ) + loop_start = find_argmax_after(autocorrelation, search_start) + loop_end = find_argmax_after( + autocorrelation, + loop_start + max_autocorrelation_peak_width + ) + + loop_end = find_similar_sample_index(file, loop_start, loop_end) - 1 + return loop_start, (loop_end - loop_start) + + +def minimize(iterable, callable): + best_result = None + best_score = None + for x in iterable: + if x: + score = callable(*x) + if best_score is None or score < best_score: + best_score = score + best_result = x + return best_result + + +def autocorrelate_loops(file, sample_rate): + autocorrelation = fast_autocorrelate(file) + search_points = [ + 3 * len(file) / 4, + 2 * len(file) / 3, + len(file) / 2, + len(file) / 3, + ] + loop_widths = [0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2, 2.5, 3.] + for search_point in search_points: + for width in loop_widths: + try: + yield find_loop_from_autocorrelation( + file, autocorrelation, + search_point, width, sample_rate) + except ValueError: + # We couldn't search for a loop width of that size. + pass + yield None + + +def find_loop_points(data, sample_rate): + channel = data[0] + + result = minimize( + autocorrelate_loops(channel, sample_rate), + lambda start, length: abs(channel[start] - channel[start + length]) + ) + + if result: + loop_start, loop_size = result + return loop_start, loop_start + loop_size + + +def process(aif, sample_rate=48000): + file = read_wave_file(aif) + + # loop_start, loop_size = window_match(file) + # loop_start, loop_size = zero_crossing_match(file) + loop_start, loop_end = find_loop_points(file) + loop_size = loop_end - loop_start + + file = file[0] + + print 'start, end', loop_start, loop_end + + plt.plot(file[loop_start:loop_end]) + plt.plot(file[loop_end:loop_start + (2 * loop_size)]) + plt.show() + + plt.plot(file[ + loop_start - (sample_rate * 2): + loop_start + (sample_rate * 2) + ]) + plt.axvline(sample_rate * 2) + plt.axvline((sample_rate * 2) + loop_size) + plt.show() + +if __name__ == "__main__": + process(sys.argv[1]) diff --git a/lib/map_xfvel.py b/lib/map_xfvel.py new file mode 100644 index 0000000..babb756 --- /dev/null +++ b/lib/map_xfvel.py @@ -0,0 +1,11 @@ +def map_xfvel(regions): + for region in regions: + region.attributes.update({ + 'hivel': region.attributes['xfin_hivel'], + 'lovel': region.attributes['xfin_lovel'], + }) + del region.attributes['xfin_hivel'] + del region.attributes['xfin_lovel'] + del region.attributes['xfout_hivel'] + del region.attributes['xfout_lovel'] + yield region diff --git a/lib/midi_helpers.py b/lib/midi_helpers.py new file mode 100644 index 0000000..50500a2 --- /dev/null +++ b/lib/midi_helpers.py @@ -0,0 +1,29 @@ +import time +import rtmidi + + +CHANNEL_OFFSET = 0x90 - 1 + + +def all_notes_off(midiout, midi_channel): + # All notes off + midiout.send_message([ + (0xB0 - 1) + midi_channel, 0x7B, 0 + ]) + # Reset all controllers + midiout.send_message([ + (0xB0 - 1) + midi_channel, 0x79, 0 + ]) + time.sleep(1.0) + + +def open_midi_port(midi_port_name): + midiout = rtmidi.MidiOut() + for i, port_name in enumerate(midiout.get_ports()): + if not midi_port_name or midi_port_name.lower() in port_name.lower(): + midiout.open_port(i) + return midiout + else: + raise Exception("Could not find port matching %s in ports %s!" % ( + midi_port_name, midiout.get_ports() + )) diff --git a/lib/numpy_helpers.py b/lib/numpy_helpers.py new file mode 100644 index 0000000..5b07df3 --- /dev/null +++ b/lib/numpy_helpers.py @@ -0,0 +1,74 @@ +import numpy +# from https://gist.github.com/nils-werner/9d321441006b112a4b116a8387c2280c + + +def sliding_window(data, size, stepsize=1, padded=False, axis=-1, copy=True): + """ + Calculate a sliding window over a signal + Parameters + ---------- + data : numpy array + The array to be slided over. + size : int + The sliding window size + stepsize : int + The sliding window stepsize. Defaults to 1. + axis : int + The axis to slide over. Defaults to the last axis. + copy : bool + Return strided array as copy to avoid sideffects when manipulating the + output array. + Returns + ------- + data : numpy array + A matrix where row in last dimension consists of one instance + of the sliding window. + Notes + ----- + - Be wary of setting `copy` to `False` as undesired sideffects with the + output values may occurr. + Examples + -------- + >>> a = numpy.array([1, 2, 3, 4, 5]) + >>> sliding_window(a, size=3) + array([[1, 2, 3], + [2, 3, 4], + [3, 4, 5]]) + >>> sliding_window(a, size=3, stepsize=2) + array([[1, 2, 3], + [3, 4, 5]]) + See Also + -------- + pieces : Calculate number of pieces available by sliding + """ + if axis >= data.ndim: + raise ValueError( + "Axis value out of range" + ) + + if stepsize < 1: + raise ValueError( + "Stepsize may not be zero or negative" + ) + + if size > data.shape[axis]: + raise ValueError( + "Sliding window size may not exceed size of selected axis" + ) + + shape = list(data.shape) + shape[axis] = numpy.floor(data.shape[axis] / stepsize - size / stepsize + 1).astype(int) + shape.append(size) + + strides = list(data.strides) + strides[axis] *= stepsize + strides.append(data.strides[axis]) + + strided = numpy.lib.stride_tricks.as_strided( + data, shape=shape, strides=strides + ) + + if copy: + return strided.copy() + else: + return strided diff --git a/lib/quantize.py b/lib/quantize.py new file mode 100644 index 0000000..28f363d --- /dev/null +++ b/lib/quantize.py @@ -0,0 +1,116 @@ +import os +import sys +import argparse +from sfzparser import SFZFile +from utils import group_by_attr +import itertools +from collections import defaultdict + +parser = argparse.ArgumentParser( + description='quantize and compress SFZ files' +) +parser.add_argument('files', type=str, help='files to process', nargs='+') +args = parser.parse_args() + + +def quantize_pitch(regions, pitch_levels=25): + lowestkey = min(map(lambda x: int(x.attributes['key']), regions)) + highestkey = max(map(lambda x: int(x.attributes['key']), regions)) + + keyspan = highestkey - lowestkey + pitch_skip = keyspan / pitch_levels + + evenly_divided = \ + int(keyspan / pitch_levels) == float(keyspan) / float(pitch_levels) + + # a dict of sample_pitch -> [lokey, hikey, pitch_keycenter] + pitchmapping = {} + for key in xrange( + lowestkey + (pitch_skip / 2), + highestkey + 1 + (pitch_skip / 2), + pitch_skip): + pitchmapping[key] = { + 'lokey': key - (pitch_skip / 2), + 'pitch_keycenter': key, + 'hikey': key + (pitch_skip / 2) - (0 if evenly_divided else 1), + } + + for key, regions in group_by_attr(regions, 'key').iteritems(): + if int(key) in pitchmapping: + for region in regions: + region.attributes.update(pitchmapping[int(key)]) + del region.attributes['key'] + yield region + + +def quantize_velocity(regions, velocity_levels=5): + lowestvel = min(map(lambda x: int(x.attributes['xfin_loivel']), regions)) + highestvel = max(map(lambda x: int(x.attributes['xfin_hivel']), regions)) + + velspan = 127 + pitch_skip = velspan / velocity_levels + + evenly_divided = \ + int(keyspan / pitch_levels) == float(keyspan) / float(pitch_levels) + + # a dict of sample_pitch -> [lokey, hikey, pitch_keycenter] + pitchmapping = {} + for key in xrange( + lowestkey + (pitch_skip / 2), + highestkey + 1 + (pitch_skip / 2), + pitch_skip): + pitchmapping[key] = { + 'lokey': key - (pitch_skip / 2), + 'pitch_keycenter': key, + 'hikey': key + (pitch_skip / 2) - (0 if evenly_divided else 1), + } + + for key, regions in group_by_attr(regions, 'key').iteritems(): + if int(key) in pitchmapping: + for region in regions: + region.attributes.update(pitchmapping[int(key)]) + del region.attributes['key'] + yield region + + +def compute_sample_size(filename, regions): + size = 0 + + for region in regions: + fullpath = os.path.join( + os.path.dirname(filename), + region.attributes['sample'] + ) + size += os.stat(fullpath).st_size + return size + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='flac-ize SFZ files into one sprite sample' + ) + parser.add_argument('files', type=str, help='files to process', nargs='+') + args = parser.parse_args() + + for filename in args.files: + groups = SFZFile(open(filename).read()).groups + sys.stderr.write( + "Original sample size: %d bytes\n" % + compute_sample_size( + filename, + sum([group.regions for group in groups], []) + ) + ) + regions = sum([group.regions for group in groups], []) + output = list(quantize_pitch(regions)) + sys.stderr.write( + "Quantized sample size: %d bytes\n" % + compute_sample_size( + filename, + output + ) + ) + for region in output: + print region + # for group in groups: + # print group diff --git a/lib/record.py b/lib/record.py new file mode 100644 index 0000000..dae0b96 --- /dev/null +++ b/lib/record.py @@ -0,0 +1,231 @@ +import sys +import numpy +from struct import pack +from math import sqrt +from constants import bit_depth, NUMPY_DTYPE, SAMPLE_RATE + +import pyaudio +import wave + +CHUNK_SIZE = 1024 +NUM_CHANNELS = 2 +FORMAT = pyaudio.paInt16 if bit_depth == 16 else pyaudio.paInt24 + +GO_UP = "\033[F" +ERASE = "\033[2K" + + +def is_silent(snd_data, threshold): + maxval = max( + abs(numpy.amax(snd_data)), + abs(numpy.amin(snd_data)) + ) / float(2 ** (bit_depth - 1)) + return maxval < threshold + + +def get_input_device_index(py_audio, audio_interface_name=None): + info = py_audio.get_host_api_info_by_index(0) + + input_interface_names = {} + for i in range(0, info.get('deviceCount')): + device_info = py_audio.get_device_info_by_host_api_device_index(0, i) + if device_info.get('maxInputChannels') > 0: + input_interface_names[i] = device_info.get('name') + + if audio_interface_name: + for index, name in input_interface_names.iteritems(): + if audio_interface_name.lower() in name.lower(): + return index + else: + raise Exception("Could not find audio input %s in inputs %s!" % ( + audio_interface_name, input_interface_names + )) + + +def record( + limit=None, + after_start=None, + on_time_up=None, + threshold=0.00025, + print_progress=True, + allow_empty_return=False, + audio_interface_name=None, + sample_rate=SAMPLE_RATE, +): + p = pyaudio.PyAudio() + input_device_index = get_input_device_index(p, audio_interface_name) + + stream = p.open( + format=FORMAT, + channels=NUM_CHANNELS, + rate=sample_rate, + input=True, + output=False, + frames_per_buffer=CHUNK_SIZE, + input_device_index=input_device_index + ) + + num_silent = 0 + silence_timeout = sample_rate * 2.0 + snd_started = False + in_tail = False + release_time = None + + if print_progress: + sys.stderr.write("\n") + + peak_value = None + peak_index = None + + data = [] + total_length = 0 + + while 1: + if total_length > 0 and after_start is not None: + after_start() + after_start = None # don't call back again + array = stream.read(CHUNK_SIZE) + snd_data = numpy.fromstring(array, dtype=NUMPY_DTYPE) + snd_data = numpy.reshape(snd_data, (2, -1), 'F') + + peak_in_buffer = numpy.amax(numpy.absolute(snd_data), 1) + peak_in_buffer_idx = numpy.argmax(numpy.absolute(snd_data)) + mono_peak_in_buffer = max(peak_in_buffer) + + if peak_value is None or peak_value < mono_peak_in_buffer: + peak_value = mono_peak_in_buffer + peak_index = total_length + peak_in_buffer_idx + + data.append(snd_data) + total_length += len(snd_data[0]) + total_duration_seconds = float(total_length) / sample_rate + + time_since_peak = total_length - peak_index + peak_pct = mono_peak_in_buffer / peak_value + if time_since_peak: + estimated_remaining_duration = peak_pct / time_since_peak + else: + estimated_remaining_duration = 1 + + if print_progress: + pct_loudness = [sqrt(x) + for x in peak_in_buffer.astype(numpy.float) / + float(2 ** (bit_depth - 1))] + sys.stderr.write(ERASE) + sys.stderr.write("\t%2.2f secs\t" % total_duration_seconds) + sys.stderr.write("%2.2f%% loudness\t\t|%s%s|\n" % ( + 100 * pct_loudness[0], + int(40 * pct_loudness[0]) * '=', + int(40 * (1 - pct_loudness[0])) * ' ', + )) + sys.stderr.write(ERASE) + sys.stderr.write("\t\t\t%2.2f%% loudness\t\t|%s%s|\n" % ( + 100 * pct_loudness[1], + int(40 * pct_loudness[1]) * '=', + int(40 * (1 - pct_loudness[1])) * ' ', + )) + pct_silence_end = float(num_silent) / silence_timeout + estimated_remaining_duration_string = \ + "est. remaining duration: %2.2f secs" % ( + estimated_remaining_duration + ) + if in_tail: + sys.stderr.write(ERASE) + sys.stderr.write("\t\treleasing\t\tsilence:|%s%s| %s" % ( + int(40 * pct_silence_end) * '=', + int(40 * (1 - pct_silence_end)) * ' ', + estimated_remaining_duration_string, + )) + else: + sys.stderr.write(ERASE) + sys.stderr.write("\t\t\t\t\tsilence:|%s%s| %s" % ( + int(40 * pct_silence_end) * '=', + int(40 * (1 - pct_silence_end)) * ' ', + estimated_remaining_duration_string, + )) + sys.stderr.write(GO_UP) + sys.stderr.write(GO_UP) + + silent = is_silent(snd_data, threshold) + + if silent: + num_silent += CHUNK_SIZE + elif not snd_started: + snd_started = True + else: + num_silent = 0 + + if num_silent > silence_timeout: + if on_time_up is not None: + on_time_up() + break + elif not in_tail \ + and limit is not None \ + and total_duration_seconds >= limit: + if on_time_up is not None: + if on_time_up(): + num_silent = 0 + in_tail = True + release_time = total_duration_seconds + else: + break + else: + break + + if print_progress: + sys.stderr.write("\n\n\n") + + # TODO this is inefficient, should preallocate a huge + # array up front and then just copy into it maybe? + # but not in the tight loop, what if that causes the clicks? + r = numpy.empty([NUM_CHANNELS, 0], dtype=NUMPY_DTYPE) + for chunk in data: + r = numpy.concatenate((r, chunk), axis=1) + + sample_width = p.get_sample_size(FORMAT) + stream.stop_stream() + stream.close() + p.terminate() + + if snd_started or allow_empty_return: + return sample_width, r, release_time + else: + return sample_width, None, release_time + + +def record_to_file( + path, + limit, + after_start=None, + on_time_up=None, + sample_rate=SAMPLE_RATE +): + sample_width, data, release_time = record( + limit, + after_start, + on_time_up, + sample_rate, + ) + if data is not None: + save_to_file(path, sample_width, data, sample_rate) + return path + else: + return None + + +def save_to_file(path, sample_width, data, sample_rate=SAMPLE_RATE): + flattened = numpy.asarray(data.flatten('F'), dtype=NUMPY_DTYPE) + packstring = '<' + ('h' * len(flattened)) + data = pack(packstring, *flattened) + + wf = wave.open(path, 'wb') + wf.setnchannels(NUM_CHANNELS) + wf.setsampwidth(sample_width) + wf.setframerate(sample_rate) + wf.writeframes(data) + wf.close() + + +if __name__ == '__main__': + print record_to_file('./demo.wav', sys.argv[1] if sys.argv[1] else None) + print("done - result written to demo.wav") diff --git a/lib/send_notes.py b/lib/send_notes.py new file mode 100644 index 0000000..e6bb452 --- /dev/null +++ b/lib/send_notes.py @@ -0,0 +1,340 @@ +import os +import time +from tqdm import tqdm +from record import save_to_file +from sfzparser import SFZFile, Region +from utils import trim_data, \ + note_name, \ + first_non_none, \ + warn_on_clipping +from constants import bit_depth, SAMPLE_RATE +from volume_leveler import level_volume +from flacize import flacize_after_sampling +from loop import find_loop_points +from click_detector import find_clicks +from collections import defaultdict +from midi_helpers import all_notes_off, open_midi_port, CHANNEL_OFFSET +from audio_helpers import sample_threshold_from_noise_floor, \ + generate_sample, \ + check_for_clipping + +VELOCITIES = [ + 15, 44, + 63, 79, 95, 111, + 127 +] +MAX_ATTEMPTS = 8 +PRINT_SILENCE_WARNINGS = False + +PORTAMENTO_PRESAMPLE_LIMIT = 2.0 +PORTAMENTO_PRESAMPLE_WAIT = 1.0 + +# percentage - how much left/right delta can we tolerate? +VOLUME_DIFF_THRESHOLD = 0.01 + + +def filename_for(note, velocity): + return '%s_v%s.aif' % (note_name(note), velocity) + + +def generate_region(note, velocity, velocities, keys=None, loop=None): + velocity_index = velocities.index(velocity) + if velocity_index > 0: + lovel = velocities[velocity_index - 1] + 1 + else: + lovel = 1 + hivel = velocity + + # Note: the velcurve should be: + # Velocity | Amplitude + # --------------------- + # hivel | 1.0 (sample at full volume) + # ... | linear mapping + # lovel + 1 | (next lowest layer's dB / this layer's dB) + + attributes = { + 'lovel': lovel, + 'hivel': hivel, + 'ampeg_release': 1, + 'sample': filename_for(note, velocity), + 'offset': 0, + } + + if loop is not None: + attributes.update({ + 'loop_mode': 'loop_continuous', + 'loop_start': loop[0], + 'loop_end': loop[1], + }) + + if keys is None or len(keys) == 1: + attributes['key'] = note + else: + attributes.update({ + 'lokey': min(keys), + 'hikey': max(keys), + 'pitch_keycenter': note, + }) + + return Region(attributes) + + +def all_notes(notes, velocities, ascending=False): + for note in (notes if ascending else reversed(notes)): + for i, velocity in enumerate(velocities): + yield note, velocity, (i == len(velocities) - 1) + + +CLICK_RETRIES = 5 + + +def generate_and_save_sample( + limit, + midiout, + note, + velocity, + midi_channel, + filename, + threshold, + velocity_levels, + keys, + looping_enabled=False, + print_progress=False, + ignore_clicks=False, + audio_interface_name=None, + sample_rate=SAMPLE_RATE, +): + tries = 0 + attempt_map = defaultdict(list) + already_seen_clicks = set() + while True: + sample_width, data, release_time = generate_sample( + limit=limit, + midiout=midiout, + note=note, + velocity=velocity, + midi_channel=midi_channel, + threshold=threshold, + print_progress=print_progress, + audio_interface_name=audio_interface_name, + sample_rate=sample_rate, + ) + + if data is not None: + data = trim_data(data, threshold * 10, threshold) + clicks = find_clicks(data[0], data[1]) + if clicks and not ignore_clicks: + if all([click in already_seen_clicks for click in clicks]): + print "Same clicks found, assuming click misdetection." + else: + already_seen_clicks.update(clicks) + print "WARNING: %s clicks detected (@ %s), resampling." % ( + len(clicks), + clicks + ) + save_to_file( + filename + ".try_" + str(tries) + ".aif", + sample_width, + data, + sample_rate, + ) + tries += 1 + attempt_map[len(clicks)].append(data) + if tries < CLICK_RETRIES: + continue + else: + num_clicks = min(attempt_map.keys()) + data = attempt_map[num_clicks][-1] + print ("WARNING: Using sample with %d clicks " + "after %d retries. (Saved to %s)") % ( + num_clicks, + tries, + filename + ) + + warn_on_clipping(data) + loop = find_loop_points(data, SAMPLE_RATE) if looping_enabled else None + save_to_file(filename, sample_width, data, sample_rate) + return generate_region( + note, velocity, velocity_levels, + keys, loop + ) + else: + return None + + +def sample_program( + output_folder='foo', + low_key=21, + high_key=109, + max_attempts=8, + midi_channel=1, + midi_port_name=None, + audio_interface_name=None, + program_number=None, + flac=True, + velocity_levels=VELOCITIES, + key_range=1, + cleanup_aif_files=True, + limit=None, + looping_enabled=False, + print_progress=False, + has_portamento=False, + sample_asc=False, + ignore_clicks=False, + sample_rate=SAMPLE_RATE, +): + if (key_range % 2) != 1: + raise NotImplementedError("Key skip must be an odd number for now.") + + midiout = open_midi_port(midi_port_name) + + path_prefix = output_folder + if program_number is not None: + print "Sampling program number %d into path %s" % ( + program_number, output_folder + ) + else: + print "Sampling into path %s" % (output_folder) + + try: + os.mkdir(path_prefix) + except OSError: + pass + + sfzfile = os.path.join(path_prefix, 'file.sfz') + try: + regions = sum([group.regions + for group in SFZFile(open(sfzfile).read()).groups], + []) + regions = [region for region in regions if region.exists(path_prefix)] + except IOError: + regions = [] + + if program_number is not None: + print "Sending program change to program %d..." % program_number + midiout.send_message([ + CHANNEL_OFFSET + midi_channel, 0xC0, program_number + ]) + + # All notes off, but like, a lot + for _ in xrange(0, 2): + all_notes_off(midiout, midi_channel) + + threshold = sample_threshold_from_noise_floor( + bit_depth, + audio_interface_name + ) + + check_for_clipping( + midiout, + midi_channel, + threshold, + bit_depth, + audio_interface_name + ) + + groups = [] + note_regions = [] + + key_range_under = key_range / 2 + key_range_over = key_range / 2 + notes_to_sample = range( + low_key, + (high_key - key_range_over) + 1, + key_range + ) + + for note, velocity, done_note in tqdm(list(all_notes( + notes_to_sample, + velocity_levels, + sample_asc + ))): + keys = range(note + key_range_under, note + key_range_over + 1) + if not keys: + keys = [note] + already_sampled_region = first_non_none([ + region for region in regions + if region.attributes['hivel'] == str(velocity) and + region.attributes.get( + 'key', region.attributes.get( + 'pitch_keycenter', None + )) == str(note)]) + if already_sampled_region is None: + filename = os.path.join(path_prefix, filename_for(note, velocity)) + + if print_progress: + print "Sampling %s at velocity %s..." % ( + note_name(note), velocity + ) + + if has_portamento: + sample_width, data, release_time = generate_sample( + limit=PORTAMENTO_PRESAMPLE_LIMIT, + midiout=midiout, + note=note, + velocity=velocity, + midi_channel=midi_channel, + threshold=threshold, + print_progress=print_progress, + audio_interface_name=audio_interface_name, + sample_rate=sample_rate, + ) + time.sleep(PORTAMENTO_PRESAMPLE_WAIT) + + for attempt in xrange(0, MAX_ATTEMPTS): + try: + region = generate_and_save_sample( + limit=limit, + midiout=midiout, + note=note, + velocity=velocity, + midi_channel=midi_channel, + filename=filename, + threshold=threshold, + velocity_levels=velocity_levels, + keys=keys, + looping_enabled=looping_enabled, + print_progress=print_progress, + ignore_clicks=ignore_clicks, + audio_interface_name=audio_interface_name, + sample_rate=sample_rate, + ) + if region: + regions.append(region) + note_regions.append(region) + with open(sfzfile, 'w') as file: + file.write("\n".join([str(r) for r in regions])) + elif PRINT_SILENCE_WARNINGS: + print "Got no sound for %s at velocity %s." % ( + note_name(note), velocity + ) + except IOError: + pass + else: + break + else: + print "Could not sample %s at vel %s: too many IOErrors." % ( + note_name(note), velocity + ) + else: + note_regions.append(already_sampled_region) + + if done_note and len(note_regions) > 0: + groups.append(level_volume(note_regions, output_folder)) + note_regions = [] + + # Write the volume-leveled output: + with open(sfzfile + '.leveled.sfz', 'w') as file: + file.write("\n".join([str(group) for group in groups])) + + if flac: + # Do a FLAC compression pass afterwards + # TODO: Do this after each note if possible + # would require graceful re-parsing of FLAC-combined regions + flacize_after_sampling( + output_folder, + groups, + sfzfile, + cleanup_aif_files=True + ) diff --git a/lib/sfzparser.py b/lib/sfzparser.py new file mode 100644 index 0000000..98ee7d1 --- /dev/null +++ b/lib/sfzparser.py @@ -0,0 +1,127 @@ +import os +import re + +comments = re.compile(r'//.*$', re.M) +lookfor = re.compile(r'<(\w+)>|(\w+)=([^\s]+)') + + +class SFZFile(object): + def __init__(self, text=None): + if text: + self.groups = self.parse(text) + else: + self.groups = [] + + def parse(self, text): + groups = [] + groupdata = {} + current_group_regions = [] + regiondata = {} + + text = re.sub(comments, '', text) + + current = None + + for m in re.finditer(lookfor, text): + if m.group(1) in ['group', 'region']: + if m.group(1) == 'group': + if groupdata != {}: + if regiondata != {}: + region = Region(regiondata) + current_group_regions.append(region) + regiondata = {} + group = Group(groupdata, current_group_regions) + groups.append(group) + groupdata = {} + current_group_regions = [] + current = groupdata + elif m.group(1) == 'region': + if regiondata != {}: + region = Region(regiondata) + current_group_regions.append(region) + regiondata = {} + current = regiondata + else: + current[m.group(2)] = m.group(3) + + if len(groups) == 0: + if regiondata != {}: + current_group_regions.append(Region(regiondata)) + return [Group({}, current_group_regions)] + else: + groups[-1].regions.append(Region(regiondata)) + return groups + + +class Group(object): + def __init__(self, attributes, regions): + self.attributes = attributes + self.regions = regions + + def flattened_regions(self): + return [region.merge(self.attributes) for region in self.regions] + + def just_group(self): + return "\n".join( + [""] + + ['%s=%s' % (k, v) for k, v in self.attributes.iteritems()] + ) + + def __repr__(self): + return "" % ( + repr(self.attributes), + repr(self.regions) + ) + + def __str__(self): + return self.just_group() + "\n" + "\n\n".join([ + str(r) for r in self.regions + ]) + + +class Region(object): + def __init__(self, attributes): + self.attributes = attributes + + def __repr__(self): + return "" % ( + repr(self.attributes) + ) + + def __str__(self): + return "\n".join( + [""] + + ['%s=%s' % (k, v) for k, v in self.attributes.iteritems()] + ) + + def exists(self, root=None): + sample_path = self.attributes['sample'] + if root: + sample_path = os.path.join(root, sample_path) + return os.path.isfile(sample_path) + + def without_attributes(self, discard=lambda x: False): + return Region(dict([ + (k, v) + for k, v in self.attributes.iteritems() + if not discard(k) + ])) + + def merge(self, other_attrs): + return Region(dict( + (k, v) + for d in [self.attributes, other_attrs] + for k, v in d.iteritems() + )) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='parse SFZ files') + parser.add_argument('files', type=str, help='files to parse', nargs='+') + args = parser.parse_args() + + for fn in args.files: + file = SFZFile(open(fn).read()) + for group in file.groups: + print group diff --git a/lib/spectrogram.py b/lib/spectrogram.py new file mode 100644 index 0000000..b10c853 --- /dev/null +++ b/lib/spectrogram.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +#coding: utf-8 +""" This work is licensed under a Creative Commons Attribution 3.0 Unported License. + Frank Zalkow, 2012-2013 """ + +import sys +import pdb +import numpy as np +from matplotlib import pyplot as plt +from numpy.lib import stride_tricks +from wavio import read_wave_file + + +def stft(sig, frame_size, overlap_fac=0.5, window=np.hanning): + """ short time fourier transform of audio signal """ + win = window(frame_size) + hop_size = int(frame_size - np.floor(overlap_fac * frame_size)) + + # zeros at beginning (thus center of 1st window should be for sample nr. 0) + samples = np.append(np.zeros(np.floor(frame_size / 2.0)), sig) + # cols for windowing + cols = np.ceil((len(samples) - frame_size) / float(hop_size)) + 1 + # zeros at end (thus samples can be fully covered by frames) + samples = np.append(samples, np.zeros(frame_size)) + + frames = stride_tricks.as_strided( + samples, + shape=(cols, frame_size), + strides=( + samples.strides[0] * hop_size, + samples.strides[0] + ) + ).copy() + + frames *= win + + return np.fft.rfft(frames) + +""" scale frequency axis logarithmically """ +def logscale_spec(spec, sr=44100, factor=20.): + timebins, freqbins = np.shape(spec) + + scale = np.linspace(0, 1, freqbins) ** factor + scale *= (freqbins-1)/max(scale) + scale = np.unique(np.round(scale)) + + # create spectrogram with new freq bins + newspec = np.complex128(np.zeros([timebins, len(scale)])) + for i in range(0, len(scale)): + if i == len(scale)-1: + newspec[:,i] = np.sum(spec[:,scale[i]:], axis=1) + else: + newspec[:,i] = np.sum(spec[:,scale[i]:scale[i+1]], axis=1) + + # list center freq of bins + allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) + freqs = [] + for i in range(0, len(scale)): + if i == len(scale)-1: + freqs += [np.mean(allfreqs[scale[i]:])] + else: + freqs += [np.mean(allfreqs[scale[i]:scale[i+1]])] + + return newspec, freqs + +""" plot spectrogram""" +def plotstft(samplerate, samples, binsize=2**10, plotpath=None, colormap="jet"): + s = stft(samples, binsize) + + sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) + ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel + + timebins, freqbins = np.shape(ims) + + plt.figure(figsize=(15, 7.5)) + plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none") + plt.colorbar() + + plt.xlabel("time (s)") + plt.ylabel("frequency (hz)") + plt.xlim([0, timebins-1]) + plt.ylim([0, freqbins]) + + xlocs = np.float32(np.linspace(0, timebins-1, 5)) + plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate]) + ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10))) + plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) + + if plotpath: + plt.savefig(plotpath, bbox_inches="tight") + else: + plt.show() + + plt.clf() + +if __name__ == "__main__": + stereo = read_wave_file(sys.argv[1]) + left = stereo[0] + plotstft(48000, left) diff --git a/lib/starts_with_click.py b/lib/starts_with_click.py new file mode 100644 index 0000000..f83a0af --- /dev/null +++ b/lib/starts_with_click.py @@ -0,0 +1,17 @@ +import sys +from wavio import read_wave_file +from constants import bit_depth + +default_threshold_samples = (0.001 * float(2 ** (bit_depth - 1))) + + +def starts_with_click(filename, threshold_samples=default_threshold_samples): + sample_data = read_wave_file(filename) + return (abs(sample_data[0][0]) > threshold_samples or + abs(sample_data[0][1]) > threshold_samples) + +if __name__ == "__main__": + if starts_with_click(sys.argv[1]): + sys.exit(0) + else: + sys.exit(1) diff --git a/lib/truncate.py b/lib/truncate.py new file mode 100644 index 0000000..73884bd --- /dev/null +++ b/lib/truncate.py @@ -0,0 +1,34 @@ +import sys +from wavio import read_wave_file +from utils import start_of, end_of + + +def chop(aif): + file = read_wave_file(aif) + start, end = min([start_of(chan) for chan in file]), \ + max([end_of(chan) for chan in file]) + print aif, start, end, float(end) / len(file[0]) + + # outfile = aif + '.chopped.aif' + # r = wave.open(aif, 'rb') + # w = wave.open(outfile, 'wb') + # w.setnchannels(r.getnchannels()) + # w.setsampwidth(r.getsampwidth()) + # w.setframerate(r.getframerate()) + + # # Seek forward to the start point + # r.readframes(start) + + # # Copy the frames from in to out + # w.writeframes(r.readframes(end - start)) + # r.close() + # w.close() + + # plt.plot(file[0][:(44100 * 2)]) + # plt.axvline(start) + # plt.axvline(end) + # plt.show() + + +if __name__ == "__main__": + chop(sys.argv[1]) diff --git a/lib/utils.py b/lib/utils.py new file mode 100644 index 0000000..bacdb70 --- /dev/null +++ b/lib/utils.py @@ -0,0 +1,96 @@ +import itertools +import numpy + +from constants import default_silence_threshold, bit_depth +from collections import defaultdict + + +NOTE_NAMES = [ + 'A', 'Bb', 'B', 'C', 'Db', 'D', + 'Eb', 'E', 'F', 'Gb', 'G', 'Ab' +] + + +def note_name(note): + from_c = int(int(note) - 21) + note_name = NOTE_NAMES[(from_c % 12)] + octave_number = (from_c / 12) + return "%s%d" % (note_name, octave_number) + + +def note_number(note_name): + octave_number = int(note_name[-1]) + note = note_name[:-1] + return 21 + NOTE_NAMES.index(note) + (12 * octave_number) + + +def warn_on_clipping(data, threshold=0.9999): + if numpy.amax(numpy.absolute(data)) > ((2 ** (bit_depth - 1)) * threshold): + print "WARNING: Clipping detected!" + + +def trim_data( + data, + start_threshold=default_silence_threshold, + end_threshold=default_silence_threshold +): + start, end = min([start_of(chan, start_threshold) for chan in data]), \ + max([end_of(chan, end_threshold) for chan in data]) + + return data[0:, start:end] + + +def trim_mono_data( + data, + start_threshold=default_silence_threshold, + end_threshold=default_silence_threshold +): + start, end = start_of(data, start_threshold), end_of(data, end_threshold) + return data[start:end] + + +def normalized(list): + return list.astype(numpy.float32) / float(numpy.amax(numpy.abs(list))) + + +def start_of(list, threshold=default_silence_threshold, samples_before=1): + if int(threshold) != threshold: + threshold = threshold * float(2 ** (bit_depth - 1)) + index = numpy.argmax(numpy.absolute(list) > threshold) + if index > (samples_before - 1): + return index - samples_before + else: + return 0 + + +def end_of(list, threshold=default_silence_threshold, samples_after=1): + if int(threshold) != threshold: + threshold = threshold * float(2 ** (bit_depth - 1)) + rev_index = numpy.argmax( + numpy.flipud(numpy.absolute(list)) > threshold + ) + if rev_index > (samples_after - 1): + return len(list) - (rev_index - samples_after) + else: + return len(list) + + +def first_non_none(list): + try: + return next(item for item in list if item is not None) + except StopIteration: + return None + + +def group_by_attr(data, attrs): + if not isinstance(attrs, list): + attrs = [attrs] + groups = defaultdict(list) + for k, g in itertools.groupby( + data, + lambda x: first_non_none([ + x.attributes.get(attr, None) for attr in attrs + ]) + ): + groups[k].extend(list(g)) + return groups diff --git a/lib/volume_leveler.py b/lib/volume_leveler.py new file mode 100644 index 0000000..43ca856 --- /dev/null +++ b/lib/volume_leveler.py @@ -0,0 +1,114 @@ +import math +import numpy +import argparse +from constants import bit_depth +from sfzparser import SFZFile, Group +from wavio import read_wave_file +from utils import group_by_attr +from flacize import full_path +from itertools import tee, izip + + +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + next(b, None) + return izip(a, b) + + +def percent_to_db(percent): + return 20. * math.log(percent) + + +def max_amp(filename): + return numpy.amax(read_wave_file(filename)) / (2. ** (bit_depth - 1)) + + +def peak_rms(data, window_size=480, limits=960): + index = max([numpy.argmax(channel) for channel in data]) + maxlimit = max([len(channel) for channel in data]) + max_so_far = 0 + for i in xrange( + max(index - limits, (window_size / 2)), + min(index + limits, maxlimit - (window_size / 2)) + ): + for channel in data: + window = channel[i - (window_size / 2):i + (window_size / 2)] + if len(window) == 0: + raise Exception("Cannot take mean of empty slice! Channel " + "size %d, index %d, window size %d" % ( + len(channel), i, window_size + )) + it_max = numpy.sqrt( + numpy.mean(window.astype(numpy.float) ** 2) + ) / (2. ** (bit_depth - 1)) + if it_max > max_so_far: + max_so_far = it_max + return max_so_far + + +REMOVE_ATTRS = ['amp_velcurve_127', 'amp_velcurve_0', 'amp_veltrack'] + + +def level_volume(regions, dirname): + if len(regions) == 0: + return None + + velcurve = {} + + velsorted = list(reversed( + sorted(regions, key=lambda x: int(x.attributes['hivel'])) + )) + for high, low in pairwise(velsorted): + try: + diff = ( + peak_rms( + read_wave_file( + full_path(dirname, low.attributes['sample']) + ) + ) / + peak_rms( + read_wave_file( + full_path(dirname, high.attributes['sample']) + ) + ) + ) + except ZeroDivisionError: + print "Got ZeroDivisionError with high sample path: %s" % \ + high.attributes['sample'] + raise + for attr in REMOVE_ATTRS: + if attr in high.attributes: + del high.attributes[attr] + velcurve.update({ + ('amp_velcurve_%d' % + int(high.attributes['hivel'])): 1, + ('amp_velcurve_%d' % + (int(high.attributes['lovel']) + 1)): diff, + }) + # print the last region that didn't have a lower counterpart + low = velsorted[-1] + for attr in REMOVE_ATTRS: + if attr in low.attributes: + del low.attributes[attr] + velcurve.update({ + ('amp_velcurve_%d' % + int(low.attributes['hivel'])): 1, + ('amp_velcurve_%d' % + (int(low.attributes['lovel']) + 1)): 0, + }) + return Group(velcurve, velsorted) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='volume-level sfz files with non-normalized samples' + ) + parser.add_argument('files', type=str, help='files to process', nargs='+') + args = parser.parse_args() + + for filename in args.files: + sfz = SFZFile(open(filename).read()) + regions = sum([group.regions for group in sfz.groups], []) + for key, regions in group_by_attr(regions, 'key').iteritems(): + print level_volume(regions) diff --git a/lib/wavio.py b/lib/wavio.py new file mode 100644 index 0000000..d8f78ff --- /dev/null +++ b/lib/wavio.py @@ -0,0 +1,42 @@ +import os +import wave +import numpy +import subprocess + +from constants import NUMPY_DTYPE + + +def read_flac_file(filename, use_numpy=False): + tempfile = filename + '.tmp.wav' + commandline = [ + 'ffmpeg', + '-y', + '-i', + filename, + tempfile + ] + # sys.stderr.write("Calling '%s'...\n" % ' '.join(commandline)) + subprocess.call( + commandline, + stdout=open('/dev/null', 'w'), + stderr=open('/dev/null', 'w') + ) + result = read_wave_file(tempfile, use_numpy) + os.unlink(tempfile) + return result + + +def read_wave_file(filename, use_numpy=False): + try: + w = wave.open(filename) + a = numpy.fromstring(w.readframes(9999999999), dtype=NUMPY_DTYPE) + if use_numpy: + return numpy.reshape(a, (w.getnchannels(), -1), 'F') + else: + return [ + a[i::w.getnchannels()] + for i in xrange(w.getnchannels()) + ] + except wave.Error: + print "Could not open %s" % filename + raise diff --git a/record.py b/record.py new file mode 100755 index 0000000..892fd7a --- /dev/null +++ b/record.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +import argparse +from lib.utils import note_number +from lib.send_notes import sample_program, VELOCITIES, MAX_ATTEMPTS + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='create SFZ files from external audio devices' + ) + sampling_options = parser.add_argument_group('Sampling Options') + sampling_options.add_argument( + '--program-number', type=int, + help='switch to a program number before recording') + sampling_options.add_argument( + '--low-key', type=note_number, default=21, + help='key to start sampling from (key name, octave number)') + sampling_options.add_argument( + '--high-key', type=note_number, default=109, + help='key to stop sampling at (key name, octave number)') + sampling_options.add_argument( + '--velocity-levels', type=int, default=VELOCITIES, nargs='+', + help='velocity levels (in [1, 127]) to sample') + sampling_options.add_argument( + '--key-skip', type=int, default=1, dest='key_range', + help='number of keys covered by one sample') + sampling_options.add_argument( + '--max-attempts', type=int, default=MAX_ATTEMPTS, + help='maximum number of tries to resample a note') + sampling_options.add_argument( + '--limit', type=float, default=45, + help='length in seconds of longest sample') + sampling_options.add_argument( + '--has-portamento', action='store_true', dest='has_portamento', + help='play each note once before sampling to avoid ' + 'portamento sweeps between notes') + sampling_options.add_argument( + '--sample-asc', action='store_true', dest='sample_asc', + help='sample notes from low to high (default false)') + + output_options = parser.add_argument_group('Output Options') + output_options.add_argument( + 'output_folder', type=str, + help='name of output folder') + output_options.add_argument( + '--no-flac', action='store_false', dest='flac', + help="don't compress output to flac samples") + output_options.add_argument( + '--no-delete', action='store_false', dest='cleanup_aif_files', + help='leave temporary .aif files in place after flac compression') + output_options.add_argument( + '--loop', action='store_true', dest='looping_enabled', + help='attempt to loop sounds (should only be used ' + 'with sounds with infinite sustain)') + output_options.add_argument( + '--ignore-clicks', action='store_true', dest='ignore_clicks', + help='turn off click/audio corruption checks (default false)') + + io_options = parser.add_argument_group('MIDI/Audio IO Options') + io_options.add_argument( + '--midi-port-name', type=str, + help='name of MIDI device to use') + io_options.add_argument( + '--midi-channel', type=int, default=1, + help='MIDI channel to send messages on') + io_options.add_argument( + '--audio-interface-name', type=str, + help='name of audio input device to use') + io_options.add_argument( + '--sample-rate', type=int, default=48000, + help='sample rate to use. audio interface must support this rate.') + + misc_options = parser.add_argument_group('Misc Options') + misc_options.add_argument( + '--print-progress', action='store_true', dest='print_progress', + help='show text-based VU meters in terminal (default false, ' + 'can cause audio artifacts)') + + args = parser.parse_args() + + sample_program( + output_folder=args.output_folder, + low_key=args.low_key, + high_key=args.high_key, + max_attempts=args.max_attempts, + midi_channel=args.midi_channel, + midi_port_name=args.midi_port_name, + audio_interface_name=args.audio_interface_name, + program_number=args.program_number, + flac=args.flac, + velocity_levels=args.velocity_levels, + key_range=args.key_range, + cleanup_aif_files=args.cleanup_aif_files, + limit=args.limit, + looping_enabled=args.looping_enabled, + print_progress=args.print_progress, + has_portamento=args.has_portamento, + sample_asc=args.sample_asc, + ignore_clicks=args.ignore_clicks, + sample_rate=args.sample_rate, + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6ee95ba --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +PyAudio==0.2.9 +midi==0.2.2 +python-rtmidi==1.0.0 +numpy==1.9.1 +tqdm==4.8.4 +tabulate==0.7.7 diff --git a/samplescanner b/samplescanner new file mode 120000 index 0000000..c020662 --- /dev/null +++ b/samplescanner @@ -0,0 +1 @@ +record.py \ No newline at end of file