Skip to content

Commit

Permalink
Merge pull request #33 from Julie-Fabre/bleeding_edge
Browse files Browse the repository at this point in the history
Handle duplicate spikes + a better back-compatibility management system
  • Loading branch information
Julie-Fabre authored Oct 29, 2023
2 parents 62175d5 + bb82695 commit 5d1c184
Show file tree
Hide file tree
Showing 14 changed files with 339 additions and 527 deletions.
12 changes: 8 additions & 4 deletions bc_qualityMetrics_pipeline.asv
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ ephysRawDir = dir('/home/netshare/zaru/JF093/2023-03-06/ephys/site1/*ap*.*bin');
ephysMetaDir = dir('/home/netshare/zaru/JF093/2023-03-06/ephys/site1/*ap*.*meta'); % path to your .meta or .oebin meta file
savePath = '/media/julie/ExtraHD/JF093/qMetrics'; % where you want to save the quality metrics
decompressDataLocal = '/media/julie/ExtraHD/decompressedData'; % where to save raw decompressed ephys data
gain_to_uV = 0.195; % use this if you are not using spikeGLX or openEphys to record your data. You can then leave ephysMetaDir
gain_to_uV = 0.195; % use this if you are not using spikeGLX or openEphys to record your data. You then must leave the ephysMetaDir
% empty(e.g. ephysMetaDir = '')

%% check MATLAB version
oldMATLAB = isMATLABReleaseOlderThan("R2019a");
of
if oldMATLAB
error('This MATLAB version is older than 2019a - download a more recent version before continuing')
end

%% load data
[spikeTimes_samples, spikeTemplates, templateWaveforms, templateAmplitudes, pcFeatures, ...
pcFeatureIdx, channelPositions] = bc_loadEphysData(ephysKilosortPath);
Expand All @@ -33,8 +36,9 @@ of
rawFile = bc_manageDataCompression(ephysRawDir, decompressDataLocal);

%% which quality metric parameters to extract and thresholds
param = bc_qualityParamValues(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV);
% param = bc_qualityParamValuesForUnitMatch(ephysMetaDir, rawFile) % Run this if you want to use UnitMatch after
param = bc_qualityParamValues(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV); %for unitmatch, run this:
% param = bc_qualityParamValuesForUnitMatch(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV)


%% compute quality metrics
rerun = 0;
Expand Down
14 changes: 8 additions & 6 deletions bc_qualityMetrics_pipeline.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@


%% set paths - EDIT THESE
ephysKilosortPath = '/home/netshare/zaru/JF093/2023-03-06/ephys/kilosort2/site1/';% path to your kilosort output files
ephysRawDir = dir('/home/netshare/zaru/JF093/2023-03-06/ephys/site1/*ap*.*bin'); % path to yourraw .bin or .dat data
ephysMetaDir = dir('/home/netshare/zaru/JF093/2023-03-06/ephys/site1/*ap*.*meta'); % path to your .meta or .oebin meta file
savePath = '/media/julie/ExtraHD/JF093/qMetrics'; % where you want to save the quality metrics
% '/home/netshare/zaru/JF093/2023-03-06/ephys/kilosort2/site1
ephysKilosortPath = '/home/netshare/zaru/JF093/2023-03-08/ephys/pykilosort/site2/output';% path to your kilosort output files
ephysRawDir = dir('/home/netshare/zaru/JF093/2023-03-08/ephys/site2/*ap*.*bin'); % path to your raw .bin or .dat data
ephysMetaDir = dir('/home/netshare/zaru/JF093/2023-03-08/ephys/site2/*ap*.*meta'); % path to your .meta or .oebin meta file
savePath = '/media/julie/ExtraHD/JF093/2023-03-08/ephys/site2/qMetrics'; % where you want to save the quality metrics
decompressDataLocal = '/media/julie/ExtraHD/decompressedData'; % where to save raw decompressed ephys data
gain_to_uV = 0.195; % use this if you are not using spikeGLX or openEphys to record your data. You then must leave the ephysMetaDir
% empty(e.g. ephysMetaDir = '')
Expand All @@ -36,8 +37,9 @@
rawFile = bc_manageDataCompression(ephysRawDir, decompressDataLocal);

%% which quality metric parameters to extract and thresholds
param = bc_qualityParamValues(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV);
% param = bc_qualityParamValuesForUnitMatch(ephysMetaDir, rawFile) % Run this if you want to use UnitMatch after
param = bc_qualityParamValues(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV); %for unitmatch, run this:
% param = bc_qualityParamValuesForUnitMatch(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV)


%% compute quality metrics
rerun = 0;
Expand Down
8 changes: 8 additions & 0 deletions loading/bc_loadMetricsForGUI.m
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@
rawWaveforms.average = readNPY([fullfile(savePath, 'templates._bc_rawWaveforms.npy')]);
rawWaveforms.peakChan = readNPY([fullfile(savePath, 'templates._bc_rawWaveformPeakChannels.npy')]);

% remove any duplicate spikes
[uniqueTemplates, ~, ephysData.spike_times_samples, ephysData.spike_templates, ephysData.template_amplitudes, ...
~, rawWaveforms.average, rawWaveforms.peakChan, signalToNoiseRatio] = ...
bc_removeDuplicateSpikes(ephysData.spike_times_samples, ephysData.spike_templates, ephysData.template_amplitudes,...
[], rawWaveforms.average, rawWaveforms.peakChan,[],...
qMetric.maxChannels, param.removeDuplicateSpikes, param.duplicateSpikeWindow_s, ...
param.ephys_sample_rate, param.saveSpikes_withoutDuplicates, savePath, param.recomputeDuplicateSpikes);

% load other gui stuffs
if ~exist('forGUI', 'var') || ~isempty(dir([savePath, filesep, 'templates.qualityMetricDetailsforGUI.mat']))
load([savePath, filesep, 'templates.qualityMetricDetailsforGUI.mat'])
Expand Down
2 changes: 1 addition & 1 deletion personal_work_in_progress/bc_qualityMetricsPipeline_JF.m
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

end
%% run qmetrics
param = bc_qualityParamValues(ephysMetaDir, rawFile);
param = bc_qualityParamValues_JF(ephysMetaDir, rawFile);
%param.computeDistanceMetrics = 1;

%% compute quality metrics
Expand Down
6 changes: 6 additions & 0 deletions personal_work_in_progress/bc_qualityParamValues_JF.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
function param = bc_qualityParamValues_JF(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV)

param = bc_qualityParamValues(ephysMetaDir, rawFile, ephysKilosortPath, gain_to_uV);
param.removeDuplicateSpikes = 0;

end
34 changes: 34 additions & 0 deletions qualityMetrics/bc_addMissingFieldsWithDefault.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
function [data, missingFields] = bc_addMissingFieldsWithDefault(data, defaultValues)
% JF, Check input structure has all necessary fields + add them with
% defualt values if not.
% ------
% Inputs
% ------
if ~isstruct(data) && ~istable(data)
error('Input must be a structure or table');
end

if ~isstruct(defaultValues)
error('Default values must be provided as a structure');
end

fieldnames = fields(defaultValues);

if isstruct(data)
missingFields = fieldnames(~isfield(data, fieldnames));

for i = 1:length(missingFields)
fieldName = missingFields{i};
data.(fieldName) = defaultValues.(fieldName);
end
else % data is a table
existingFields = data.Properties.VariableNames;
missingFields = setdiff(fieldnames, existingFields);

for i = 1:length(missingFields)
fieldName = missingFields{i};
data.(fieldName) = repmat(defaultValues.(fieldName), height(data), 1);
end
end
end

41 changes: 41 additions & 0 deletions qualityMetrics/bc_checkParameterFields.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
function param_complete = bc_checkParameterFields(param)
% JF, Check input structure has all necessary fields + add them with
% defualt values if not. This is to ensure backcompatibility when any new
% paramaters are introduced. By default, any parameters not already present
% will be set so that the quality metrics are calculated in the same way as
% they were before these new parameters were introduced.
% ------
% Inputs
% ------



%% Default values for fields
% duplicate spikes
defaultValues.removeDuplicateSpikes = 0;
defaultValues.duplicateSpikeWindow_s = 0.0001;
defaultValues.saveSpikes_withoutDuplicates = 1;
defaultValues.recomputeDuplicateSpikes = 0;

% raw waveforms
defaultValues.detrendWaveforms = 0;
defaultValues.extractRaw = 1;

% amplitude
defaultValues.gain_to_uV = NaN;

% phy saving
defaultValues.saveAsTSV = 0;
defaultValues.unitType_for_phy = 0;


%% Check for missing fields and add them with default value
[param_complete, missingFields] = bc_addMissingFieldsWithDefault(param, defaultValues);

%% Display result
if ~isempty(missingFields)
disp('Missing param fields filled in with default values');
disp(missingFields);
end

end
4 changes: 4 additions & 0 deletions qualityMetrics/bc_getQualityUnitType.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
% ------
% Outputs
% ------

% check paramaters
param = bc_checkParameterFields(param);

if nargin < 3 && param.unitType_for_phy == 1
savePath = pwd;
warning('no save path specified. using current working directory')
Expand Down
7 changes: 7 additions & 0 deletions qualityMetrics/bc_qualityParamValues.m
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
end
param.saveMatFileForGUI = 1; % save certain outputs at .mat file - useful for GUI

% duplicate spikes parameters
param.removeDuplicateSpikes = 1;
param.duplicateSpikeWindow_s = 0.00001; % in seconds
param.saveSpikes_withoutDuplicates = 1;
param.recomputeDuplicateSpikes = 0;

% amplitude / raw waveform parameters
param.detrendWaveform = 1; % If this is set to 1, each raw extracted spike is
% detrended (we remove the best straight-fit line from the spike)
Expand All @@ -56,6 +62,7 @@
% For additional probe types, make a pull request with more
% information. If your spikeGLX meta file contains information about your probe
% type, or if you are using open ephys, this paramater wil be ignored.
param.detrendWaveforms = 0;

% signal to noise ratio
param.waveformBaselineNoiseWindow = 20; %time in samples at beginning of times
Expand Down
160 changes: 160 additions & 0 deletions qualityMetrics/bc_removeDuplicateSpikes.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
function [nonEmptyUnits, duplicateSpikes_idx, spikeTimes_samples, spikeTemplates, templateAmplitudes, ...
pcFeatures, rawWaveformsFull, rawWaveformsPeakChan, signalToNoiseRatio, maxChannels] = ...
bc_removeDuplicateSpikes(spikeTimes_samples, spikeTemplates, templateAmplitudes, ...
pcFeatures, rawWaveformsFull, rawWaveformsPeakChan, signalToNoiseRatio, ...
maxChannels, removeDuplicateSpikes_flag, ...
duplicateSpikeWindow_s, ephys_sample_rate, saveSpikes_withoutDuplicates_flag, savePath, recompute)
% JF, Remove any duplicate spikes
% Some spike sorters (including kilosort) can sometimes count spikes twice
% if for instance the residuals are re-fitted. see https://github.com/MouseLand/Kilosort/issues/29
% ------
% Inputs
% ------
%
% ------
% Outputs
% ------
%

if removeDuplicateSpikes_flag
% Check if we need to extract duplicate spikes
if recompute || isempty(dir([savePath, filesep, 'spikes._bc_duplicateSpikes.npy']))
% Parameters
duplicateSpikeWindow_samples = duplicateSpikeWindow_s * ephys_sample_rate;
batch_size = 10000;
overlap_size = 100;
numSpikes_full = length(spikeTimes_samples);

% initialize and re-allocate
duplicateSpikes_idx = false(1, numSpikes_full);

% Rename the spike templates according to the remaining templates
good_templates_idx = unique(spikeTemplates);
new_spike_idx = nan(max(spikeTemplates), 1);
new_spike_idx(good_templates_idx) = 1:length(good_templates_idx);
spikeTemplates_flat = new_spike_idx(spikeTemplates);

% check for duplicate spikes in batches
for start_idx = 1:batch_size - overlap_size:numSpikes_full
end_idx = min(start_idx+batch_size-1, numSpikes_full);
batch_spikeTimes_samples = spikeTimes_samples(start_idx:end_idx);
batch_spikeTemplates = spikeTemplates(start_idx:end_idx);
batch_templateAmplitudes = templateAmplitudes(start_idx:end_idx);

[~, ~, batch_removeIdx] = removeDuplicates(batch_spikeTimes_samples, ...
batch_spikeTemplates, batch_templateAmplitudes, duplicateSpikeWindow_samples, ...
maxChannels, spikeTemplates_flat);

duplicateSpikes_idx(start_idx:end_idx) = batch_removeIdx;

if end_idx == numSpikes_full
break;
end
end
% save data if required
if saveSpikes_withoutDuplicates_flag
writeNPY(duplicateSpikes_idx, [savePath, filesep, 'spikes._bc_duplicateSpikes.npy'])
end

else
duplicateSpikes_idx = readNPY([savePath, filesep, 'spikes._bc_duplicateSpikes.npy']);
end

% check if there are any empty units
unique_templates = unique(spikeTemplates);
nonEmptyUnits = unique(spikeTemplates(~duplicateSpikes_idx));
emptyUnits_idx = ~ismember(unique_templates, nonEmptyUnits);

% remove any empty units from ephys data
spikeTimes_samples = spikeTimes_samples(~duplicateSpikes_idx);
spikeTemplates = spikeTemplates(~duplicateSpikes_idx);
templateAmplitudes = templateAmplitudes(~duplicateSpikes_idx);
if ~isempty(pcFeatures)
pcFeatures = pcFeatures(~duplicateSpikes_idx, :, :);
end
if ~isempty(rawWaveformsFull)
rawWaveformsFull = rawWaveformsFull(~emptyUnits_idx, :, :);
rawWaveformsPeakChan = rawWaveformsPeakChan(~emptyUnits_idx);
end

if ~isempty(signalToNoiseRatio)
signalToNoiseRatio = signalToNoiseRatio(~emptyUnits_idx);
end

fprintf('\n Removed %.0f spike duplicates out of %.0f. \n', sum(duplicateSpikes_idx), length(duplicateSpikes_idx))

else
nonEmptyUnits = unique(spikeTemplates);
duplicateSpikes_idx = zeros(size(spikeTimes_samples, 1), 1);

end


function [spikeTimes_samples, spikeTemplates, removeIdx] = removeDuplicates(spikeTimes_samples, ...
spikeTemplates, templateAmplitudes, duplicateSpikeWindow_samples, maxChannels, spikeTemplates_flat)
numSpikes = length(spikeTimes_samples);
removeIdx = false(1, numSpikes);

% Intra-unit duplicate removal
for iSpike1 = 1:numSpikes
if removeIdx(iSpike1)
continue;
end

for iSpike2 = iSpike1 + 1:numSpikes
if removeIdx(iSpike2)
continue;
end
if maxChannels(spikeTemplates_flat(iSpike2)) ~= maxChannels(spikeTemplates_flat(iSpike1)) % spikes are not on same channel
continue;
end

if spikeTemplates(iSpike1) == spikeTemplates(iSpike2)
if abs(spikeTimes_samples(iSpike1)-spikeTimes_samples(iSpike2)) <= duplicateSpikeWindow_samples
if templateAmplitudes(iSpike1) < templateAmplitudes(iSpike2)
spikeTimes_samples(iSpike1) = NaN;
removeIdx(iSpike1) = true;
break;
else
spikeTimes_samples(iSpike2) = NaN;
removeIdx(iSpike2) = true;
end
end
end
end
end


% Inter-unit duplicate removal
unitSpikeCounts = accumarray(spikeTemplates, 1);
for iSpike1 = 1:length(spikeTimes_samples)
if removeIdx(iSpike1)
continue;
end

for iSpike2 = iSpike1 + 1:length(spikeTimes_samples)
if removeIdx(iSpike2)
continue;
end
if maxChannels(spikeTemplates_flat(iSpike2)) ~= maxChannels(spikeTemplates_flat(iSpike1)) % spikes are not on same channel
continue;
end

if spikeTemplates(iSpike1) ~= spikeTemplates(iSpike2)
if abs(spikeTimes_samples(iSpike1)-spikeTimes_samples(iSpike2)) <= duplicateSpikeWindow_samples
if unitSpikeCounts(spikeTemplates(iSpike1)) < unitSpikeCounts(spikeTemplates(iSpike2))
spikeTimes_samples(iSpike1) = NaN;
removeIdx(iSpike1) = true;
break;
else
spikeTimes_samples(iSpike2) = NaN;
removeIdx(iSpike2) = true;
end
end
end
end
end
end


end
Loading

0 comments on commit 5d1c184

Please sign in to comment.