Skip to content

Commit

Permalink
Merge pull request NeuralEnsemble#1425 from samuelgarcia/fix_neuralyn…
Browse files Browse the repository at this point in the history
…x_gaps

Neuralynx : new approach gap tolerance
  • Loading branch information
samuelgarcia authored Mar 6, 2024
2 parents 6cd8158 + 1ed52bd commit 8ec58f7
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 263 deletions.
352 changes: 95 additions & 257 deletions neo/rawio/neuralynxrawio/ncssections.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,264 +172,64 @@ def calc_sample_time(sampFr, startTime, posn):
return round(startTime + NcsSectionsFactory.get_micros_per_samp_for_freq(sampFr) * posn)

@staticmethod
def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime):
def _buildNcsSections(ncsMemMap, sampFreq, gapTolerance=0):
"""
Parse sections in memory mapped file when microsPerSampUsed and sampFreqUsed are known,
filling in an NcsSections object.
Parameters
----------
ncsMemMap:
memmap of Ncs file
ncsSections:
NcsSections with actual sampFreqUsed correct, first NcsSection with proper startSect
and startTime already added.
chanNum:
channel number that should be present in all records
reqFreq:
rounded frequency that all records should contain
blkOnePredTime:
predicted starting time of second record in block
Returns
-------
NcsSections object with block locations marked
Construct NcsSections with fast mode when no gaps or parsing the file to detect gaps.
"""

# New code numpy vector based (speedup X50)
delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64)
delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64)
gap_inds = np.flatnonzero((delta - delta_prediction) != 0)
gap_inds += 1
sections_limits = [0] + gap_inds.tolist() + [len(ncsMemMap)]

for i in range(len(gap_inds) + 1):
start = sections_limits[i]
stop = sections_limits[i + 1]
ncsSects.sects.append(
NcsSection(
startRec=start,
startTime=ncsMemMap["timestamp"][start],
endRec=stop - 1,
endTime=ncsMemMap["timestamp"][stop - 1]
+ np.uint64(ncsMemMap["nb_valid"][stop - 1] / ncsSects.sampFreqUsed * 1e6),
n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]),
)
)

return ncsSects

@staticmethod
def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq):
"""
Build NcsSections object for file given actual sampling frequency.
Requires that frequency in each record agrees with requested frequency. This is
normally obtained by rounding the header frequency; however, this value may be different
from the rounded actual frequency used in the recording, since the underlying
requirement in older Ncs files was that the number of microseconds per sample in the
records is the inverse of the sampling frequency stated in the header truncated to
whole microseconds.
Parameters
----------
ncsMemMap:
memmap of Ncs file
actualSampFreq:
actual sampling frequency used
reqFreq:
frequency to require in records
Returns
-------
NcsSections object
"""
# check frequency in first record
if ncsMemMap["sample_rate"][0] != reqFreq:
raise IOError("Sampling frequency in first record doesn't agree with header.")
chanNum = ncsMemMap["channel_id"][0]
channel_id = ncsMemMap["channel_id"][0]

ncsSects = NcsSections()
ncsSects.sampFreqUsed = actualSampFreq
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq)

# check if file is one block of records, which is often the case, and avoid full parse
lastBlkI = ncsMemMap.shape[0] - 1
ts0 = ncsMemMap["timestamp"][0]
nb0 = ncsMemMap["nb_valid"][0]
# need that last timestamp match the predicted one.
predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(
actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI
sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * (ncsMemMap.shape[0] - 1)
)
lts = ncsMemMap["timestamp"][lastBlkI]
lnb = ncsMemMap["nb_valid"][lastBlkI]
if (
ncsMemMap["channel_id"][lastBlkI] == chanNum
and ncsMemMap["sample_rate"][lastBlkI] == reqFreq
and lts == predLastBlockStartTime
channel_id == ncsMemMap["channel_id"][-1]
and ncsMemMap["sample_rate"][0] == ncsMemMap["sample_rate"][-1]
and ncsMemMap["timestamp"][-1] == predLastBlockStartTime
):
lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb)
n_samples = NcsSection._RECORD_SIZE * lastBlkI
curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples)

ncsSects.sects.append(curBlock)
return ncsSects

lastBlkEndTime = NcsSectionsFactory.calc_sample_time(sampFreq, ncsMemMap["timestamp"][-1], ncsMemMap["nb_valid"][-1])
n_samples = NcsSection._RECORD_SIZE * (ncsMemMap.size - 1) + ncsMemMap["nb_valid"][-1]
section0 = NcsSection(
startRec=0,
startTime=ncsMemMap["timestamp"][0],
endRec=ncsMemMap.size - 1,
endTime=lastBlkEndTime,
n_samples=n_samples
)
ncsSects.sects.append(section0)

else:
# otherwise need to scan looking for data gaps
blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0)
# curBlock = NcsSection(0, ts0, -1, -1, -1)
# ncsSects.sects.append(curBlock)
ncsSects = NcsSectionsFactory._parseGivenActualFrequency(
ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime
)
return ncsSects

@staticmethod
def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen):
"""
Parse blocks of records from file, allowing a maximum gap in timestamps between records
in sections. Estimates frequency being used based on timestamps.
Parameters
----------
ncsMemMap:
memmap of Ncs file
ncsSects:
NcsSections object with sampFreqUsed set to nominal frequency to use in computing time
for samples (Hz)
maxGapLen:
maximum difference within a block between predicted time of start of record and
recorded time
Returns
-------
NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from
largest block
"""

chanNum = ncsMemMap["channel_id"][0]
recFreq = ncsMemMap["sample_rate"][0]

# check for consistent channel_ids and sampling rates
ncsMemMap["channel_id"]
if not (ncsMemMap["channel_id"] == chanNum).all():
raise IOError("Channel number changed in records within file")

if not all(ncsMemMap["sample_rate"] == recFreq):
raise IOError("Sampling frequency changed in records within file")

# find most frequent number of samples
exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"]))
# detect records with incomplete number of samples
gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0])

rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"]
pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64)
max_pred_times = pred_times + maxGapLen
# data records that start later than the predicted time (including the
# maximal accepted gap length) are considered delayed and a gap is
# registered.
delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0])
gap_rec_ids.extend(delayed_recs)

# cleaning extracted gap ids
# last record can not be the beginning of a gap
last_rec_id = len(ncsMemMap["timestamp"]) - 1
if last_rec_id in gap_rec_ids:
gap_rec_ids.remove(last_rec_id)

# gap ids can only be listed once
gap_rec_ids = sorted(set(gap_rec_ids))

# create recording segments from identified gaps
ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1))
for gap_rec_id in gap_rec_ids:
curr_sec = ncsSects.sects[-1]
curr_sec.endRec = gap_rec_id
curr_sec.endTime = pred_times[gap_rec_id]
n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1])
curr_sec.n_samples = n_samples

next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1)
ncsSects.sects.append(next_sec)

curr_sec = ncsSects.sects[-1]
curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1
curr_sec.endTime = pred_times[-1]
n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :])
curr_sec.n_samples = n_samples

# calculate the estimated frequency of the block with the most samples
max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects])
max_blk = ncsSects.sects[max_blk_idx]

maxBlkFreqEstimate = (
(max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec])
* 1e6
/ (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime)
)
# need to parse all data block to detect gaps
# check when the predicted timestamp is outside the tolerance
delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64)
delta_prediction = ((ncsMemMap["nb_valid"][:-1] / sampFreq) * 1e6).astype(np.int64)

gap_inds = np.flatnonzero(np.abs(delta - delta_prediction) > gapTolerance)
gap_inds += 1

sections_limits = [ 0 ] + gap_inds.tolist() + [len(ncsMemMap)]

for i in range(len(gap_inds) + 1):
start = sections_limits[i]
stop = sections_limits[i + 1]
duration = np.uint64(1e6 / sampFreq * ncsMemMap["nb_valid"][stop-1])
ncsSects.sects.append(
NcsSection(
startRec=start,
startTime=ncsMemMap["timestamp"][start],
endRec=stop-1,
endTime=ncsMemMap["timestamp"][stop-1] + duration,
n_samples=np.sum(ncsMemMap["nb_valid"][start:stop])
)
)

ncsSects.sampFreqUsed = maxBlkFreqEstimate
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate)
# free memory that is unnecessarily occupied by the memmap
# (see https://github.com/numpy/numpy/issues/19340)
del ncsMemMap
return ncsSects

@staticmethod
def _buildForMaxGap(ncsMemMap, nomFreq):
"""
Determine sections of records in memory mapped Ncs file given a nominal frequency of
the file, using the default values of frequency tolerance and maximum gap between blocks.
Parameters
----------
ncsMemMap:
memmap of Ncs file
nomFreq:
nominal sampling frequency used, normally from header of file
Returns
-------
NcsSections object
"""
nb = NcsSections()

numRecs = ncsMemMap.shape[0]
if numRecs < 1:
return nb

chanNum = ncsMemMap["channel_id"][0]
ts0 = ncsMemMap["timestamp"][0]

lastBlkI = numRecs - 1
lts = ncsMemMap["timestamp"][lastBlkI]
lcid = ncsMemMap["channel_id"][lastBlkI]
lnb = ncsMemMap["nb_valid"][lastBlkI]
lsr = ncsMemMap["sample_rate"][lastBlkI]

# check if file is one block of records, with exact timestamp match, which may be the case
numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI
predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred)
freqInFile = math.floor(nomFreq)
if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile:
endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb)
curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred)
nb.sects.append(curBlock)
nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6
nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed)

# otherwise parse records to determine blocks using default maximum gap length
else:
nb.sampFreqUsed = nomFreq
nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed)
maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq)
nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow)

return nb

@staticmethod
def build_for_ncs_file(ncsMemMap, nlxHdr):
def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=True):
"""
Build an NcsSections object for an NcsFile, given as a memmap and NlxHeader,
handling gap detection appropriately given the file type as specified by the header.
Expand All @@ -446,32 +246,70 @@ def build_for_ncs_file(ncsMemMap, nlxHdr):
An NcsSections corresponding to the provided ncsMemMap and nlxHdr
"""
acqType = nlxHdr.type_of_recording()
freq = nlxHdr["sampling_rate"]

# Old Neuralynx style with truncated whole microseconds for actual sampling. This
# restriction arose from the sampling being based on a master 1 MHz clock.
if acqType == "PRE4":
freq = nlxHdr["sampling_rate"]
# Old Neuralynx style with truncated whole microseconds for actual sampling. This
# restriction arose from the sampling being based on a master 1 MHz clock.
microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq))
sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed)
nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(freq))
nb.sampFreqUsed = sampFreqUsed
nb.microsPerSampUsed = microsPerSampUsed
if gapTolerance is None:
if strict_gap_mode:
# this is the old behavior, maybe we could put 0.9 sample interval no ?
gapTolerance = 0
else:
gapTolerance = 0

ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance)
ncsSects.sampFreqUsed = sampFreqUsed
ncsSects.microsPerSampUsed = microsPerSampUsed

# digital lynx style with fractional frequency and micros per samp determined from
# block times
elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]:
nomFreq = nlxHdr["sampling_rate"]
nb = NcsSectionsFactory._buildForMaxGap(ncsMemMap, nomFreq)
# digital lynx style with fractional frequency and micros per samp determined from block times
if gapTolerance is None:
if strict_gap_mode:
# this is the old behavior
gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq)
else:
# quarter of paquet size is tolerate
gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq)
ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance)


# take longer data block to compute reaal sampling rate
# ind_max = np.argmax([section.n_samples for section in ncsSects.sects])
ind_max = np.argmax([section.endRec - section.startRec for section in ncsSects.sects])
section = ncsSects.sects[ind_max]
if section.endRec != section.startRec:
# need several data block
sampFreqUsed = (
(section.n_samples - ncsMemMap["nb_valid"][section.endRec])
* 1e6
/ (ncsMemMap["timestamp"][section.endRec] - section.startTime)
)
else:
sampFreqUsed = freq
ncsSects.sampFreqUsed = sampFreqUsed
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreqUsed)

# BML & ATLAS style with fractional frequency and micros per samp

elif acqType == "BML" or acqType == "ATLAS":
sampFreqUsed = nlxHdr["sampling_rate"]
nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(sampFreqUsed))
# BML & ATLAS style with fractional frequency and micros per samp
if strict_gap_mode:
# this is the old behavior, maybe we could put 0.9 sample interval no ?
gapTolerance = 0
else:
# quarter of paquet size is tolerate
gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq)
ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance)
ncsSects.sampFreqUsed = freq
ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq)


else:
raise TypeError("Unknown Ncs file type from header.")

return nb
return ncsSects

@staticmethod
def _verifySectionsStructure(ncsMemMap, ncsSects):
Expand Down
Loading

0 comments on commit 8ec58f7

Please sign in to comment.