diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index f3280c2ca..462913f72 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -172,264 +172,64 @@ def calc_sample_time(sampFr, startTime, posn): return round(startTime + NcsSectionsFactory.get_micros_per_samp_for_freq(sampFr) * posn) @staticmethod - def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime): + def _buildNcsSections(ncsMemMap, sampFreq, gapTolerance=0): """ - Parse sections in memory mapped file when microsPerSampUsed and sampFreqUsed are known, - filling in an NcsSections object. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - ncsSections: - NcsSections with actual sampFreqUsed correct, first NcsSection with proper startSect - and startTime already added. - chanNum: - channel number that should be present in all records - reqFreq: - rounded frequency that all records should contain - blkOnePredTime: - predicted starting time of second record in block - - Returns - ------- - NcsSections object with block locations marked + Construct NcsSections with fast mode when no gaps or parsing the file to detect gaps. """ - - # New code numpy vector based (speedup X50) - delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64) - delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64) - gap_inds = np.flatnonzero((delta - delta_prediction) != 0) - gap_inds += 1 - sections_limits = [0] + gap_inds.tolist() + [len(ncsMemMap)] - - for i in range(len(gap_inds) + 1): - start = sections_limits[i] - stop = sections_limits[i + 1] - ncsSects.sects.append( - NcsSection( - startRec=start, - startTime=ncsMemMap["timestamp"][start], - endRec=stop - 1, - endTime=ncsMemMap["timestamp"][stop - 1] - + np.uint64(ncsMemMap["nb_valid"][stop - 1] / ncsSects.sampFreqUsed * 1e6), - n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]), - ) - ) - - return ncsSects - - @staticmethod - def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): - """ - Build NcsSections object for file given actual sampling frequency. - - Requires that frequency in each record agrees with requested frequency. This is - normally obtained by rounding the header frequency; however, this value may be different - from the rounded actual frequency used in the recording, since the underlying - requirement in older Ncs files was that the number of microseconds per sample in the - records is the inverse of the sampling frequency stated in the header truncated to - whole microseconds. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - actualSampFreq: - actual sampling frequency used - reqFreq: - frequency to require in records - - Returns - ------- - NcsSections object - """ - # check frequency in first record - if ncsMemMap["sample_rate"][0] != reqFreq: - raise IOError("Sampling frequency in first record doesn't agree with header.") - chanNum = ncsMemMap["channel_id"][0] + channel_id = ncsMemMap["channel_id"][0] ncsSects = NcsSections() - ncsSects.sampFreqUsed = actualSampFreq - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) # check if file is one block of records, which is often the case, and avoid full parse - lastBlkI = ncsMemMap.shape[0] - 1 - ts0 = ncsMemMap["timestamp"][0] - nb0 = ncsMemMap["nb_valid"][0] + # need that last timestamp match the predicted one. predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( - actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI + sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * (ncsMemMap.shape[0] - 1) ) - lts = ncsMemMap["timestamp"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] if ( - ncsMemMap["channel_id"][lastBlkI] == chanNum - and ncsMemMap["sample_rate"][lastBlkI] == reqFreq - and lts == predLastBlockStartTime + channel_id == ncsMemMap["channel_id"][-1] + and ncsMemMap["sample_rate"][0] == ncsMemMap["sample_rate"][-1] + and ncsMemMap["timestamp"][-1] == predLastBlockStartTime ): - lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) - n_samples = NcsSection._RECORD_SIZE * lastBlkI - curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) - - ncsSects.sects.append(curBlock) - return ncsSects - + lastBlkEndTime = NcsSectionsFactory.calc_sample_time(sampFreq, ncsMemMap["timestamp"][-1], ncsMemMap["nb_valid"][-1]) + n_samples = NcsSection._RECORD_SIZE * (ncsMemMap.size - 1) + ncsMemMap["nb_valid"][-1] + section0 = NcsSection( + startRec=0, + startTime=ncsMemMap["timestamp"][0], + endRec=ncsMemMap.size - 1, + endTime=lastBlkEndTime, + n_samples=n_samples + ) + ncsSects.sects.append(section0) + else: - # otherwise need to scan looking for data gaps - blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) - # curBlock = NcsSection(0, ts0, -1, -1, -1) - # ncsSects.sects.append(curBlock) - ncsSects = NcsSectionsFactory._parseGivenActualFrequency( - ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime - ) - return ncsSects - - @staticmethod - def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): - """ - Parse blocks of records from file, allowing a maximum gap in timestamps between records - in sections. Estimates frequency being used based on timestamps. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - ncsSects: - NcsSections object with sampFreqUsed set to nominal frequency to use in computing time - for samples (Hz) - maxGapLen: - maximum difference within a block between predicted time of start of record and - recorded time - - Returns - ------- - NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from - largest block - """ - - chanNum = ncsMemMap["channel_id"][0] - recFreq = ncsMemMap["sample_rate"][0] - - # check for consistent channel_ids and sampling rates - ncsMemMap["channel_id"] - if not (ncsMemMap["channel_id"] == chanNum).all(): - raise IOError("Channel number changed in records within file") - - if not all(ncsMemMap["sample_rate"] == recFreq): - raise IOError("Sampling frequency changed in records within file") - - # find most frequent number of samples - exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) - # detect records with incomplete number of samples - gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) - - rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] - pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) - max_pred_times = pred_times + maxGapLen - # data records that start later than the predicted time (including the - # maximal accepted gap length) are considered delayed and a gap is - # registered. - delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) - gap_rec_ids.extend(delayed_recs) - - # cleaning extracted gap ids - # last record can not be the beginning of a gap - last_rec_id = len(ncsMemMap["timestamp"]) - 1 - if last_rec_id in gap_rec_ids: - gap_rec_ids.remove(last_rec_id) - - # gap ids can only be listed once - gap_rec_ids = sorted(set(gap_rec_ids)) - - # create recording segments from identified gaps - ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) - for gap_rec_id in gap_rec_ids: - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = gap_rec_id - curr_sec.endTime = pred_times[gap_rec_id] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) - curr_sec.n_samples = n_samples - - next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) - ncsSects.sects.append(next_sec) - - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 - curr_sec.endTime = pred_times[-1] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) - curr_sec.n_samples = n_samples - - # calculate the estimated frequency of the block with the most samples - max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) - max_blk = ncsSects.sects[max_blk_idx] - - maxBlkFreqEstimate = ( - (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) - * 1e6 - / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) - ) + # need to parse all data block to detect gaps + # check when the predicted timestamp is outside the tolerance + delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64) + delta_prediction = ((ncsMemMap["nb_valid"][:-1] / sampFreq) * 1e6).astype(np.int64) + + gap_inds = np.flatnonzero(np.abs(delta - delta_prediction) > gapTolerance) + gap_inds += 1 + + sections_limits = [ 0 ] + gap_inds.tolist() + [len(ncsMemMap)] + + for i in range(len(gap_inds) + 1): + start = sections_limits[i] + stop = sections_limits[i + 1] + duration = np.uint64(1e6 / sampFreq * ncsMemMap["nb_valid"][stop-1]) + ncsSects.sects.append( + NcsSection( + startRec=start, + startTime=ncsMemMap["timestamp"][start], + endRec=stop-1, + endTime=ncsMemMap["timestamp"][stop-1] + duration, + n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]) + ) + ) - ncsSects.sampFreqUsed = maxBlkFreqEstimate - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) - # free memory that is unnecessarily occupied by the memmap - # (see https://github.com/numpy/numpy/issues/19340) - del ncsMemMap return ncsSects @staticmethod - def _buildForMaxGap(ncsMemMap, nomFreq): - """ - Determine sections of records in memory mapped Ncs file given a nominal frequency of - the file, using the default values of frequency tolerance and maximum gap between blocks. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - nomFreq: - nominal sampling frequency used, normally from header of file - - Returns - ------- - NcsSections object - """ - nb = NcsSections() - - numRecs = ncsMemMap.shape[0] - if numRecs < 1: - return nb - - chanNum = ncsMemMap["channel_id"][0] - ts0 = ncsMemMap["timestamp"][0] - - lastBlkI = numRecs - 1 - lts = ncsMemMap["timestamp"][lastBlkI] - lcid = ncsMemMap["channel_id"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] - lsr = ncsMemMap["sample_rate"][lastBlkI] - - # check if file is one block of records, with exact timestamp match, which may be the case - numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI - predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) - freqInFile = math.floor(nomFreq) - if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: - endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) - curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) - nb.sects.append(curBlock) - nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - - # otherwise parse records to determine blocks using default maximum gap length - else: - nb.sampFreqUsed = nomFreq - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) - nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) - - return nb - - @staticmethod - def build_for_ncs_file(ncsMemMap, nlxHdr): + def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=True): """ Build an NcsSections object for an NcsFile, given as a memmap and NlxHeader, handling gap detection appropriately given the file type as specified by the header. @@ -446,32 +246,70 @@ def build_for_ncs_file(ncsMemMap, nlxHdr): An NcsSections corresponding to the provided ncsMemMap and nlxHdr """ acqType = nlxHdr.type_of_recording() + freq = nlxHdr["sampling_rate"] - # Old Neuralynx style with truncated whole microseconds for actual sampling. This - # restriction arose from the sampling being based on a master 1 MHz clock. if acqType == "PRE4": - freq = nlxHdr["sampling_rate"] + # Old Neuralynx style with truncated whole microseconds for actual sampling. This + # restriction arose from the sampling being based on a master 1 MHz clock. microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq)) sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed) - nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(freq)) - nb.sampFreqUsed = sampFreqUsed - nb.microsPerSampUsed = microsPerSampUsed + if gapTolerance is None: + if strict_gap_mode: + # this is the old behavior, maybe we could put 0.9 sample interval no ? + gapTolerance = 0 + else: + gapTolerance = 0 + + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance) + ncsSects.sampFreqUsed = sampFreqUsed + ncsSects.microsPerSampUsed = microsPerSampUsed - # digital lynx style with fractional frequency and micros per samp determined from - # block times elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]: - nomFreq = nlxHdr["sampling_rate"] - nb = NcsSectionsFactory._buildForMaxGap(ncsMemMap, nomFreq) + # digital lynx style with fractional frequency and micros per samp determined from block times + if gapTolerance is None: + if strict_gap_mode: + # this is the old behavior + gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq) + else: + # quarter of paquet size is tolerate + gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance) + + + # take longer data block to compute reaal sampling rate + # ind_max = np.argmax([section.n_samples for section in ncsSects.sects]) + ind_max = np.argmax([section.endRec - section.startRec for section in ncsSects.sects]) + section = ncsSects.sects[ind_max] + if section.endRec != section.startRec: + # need several data block + sampFreqUsed = ( + (section.n_samples - ncsMemMap["nb_valid"][section.endRec]) + * 1e6 + / (ncsMemMap["timestamp"][section.endRec] - section.startTime) + ) + else: + sampFreqUsed = freq + ncsSects.sampFreqUsed = sampFreqUsed + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreqUsed) - # BML & ATLAS style with fractional frequency and micros per samp + elif acqType == "BML" or acqType == "ATLAS": - sampFreqUsed = nlxHdr["sampling_rate"] - nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(sampFreqUsed)) + # BML & ATLAS style with fractional frequency and micros per samp + if strict_gap_mode: + # this is the old behavior, maybe we could put 0.9 sample interval no ? + gapTolerance = 0 + else: + # quarter of paquet size is tolerate + gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance) + ncsSects.sampFreqUsed = freq + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq) + else: raise TypeError("Unknown Ncs file type from header.") - return nb + return ncsSects @staticmethod def _verifySectionsStructure(ncsMemMap, ncsSects): diff --git a/neo/rawio/neuralynxrawio/neuralynxrawio.py b/neo/rawio/neuralynxrawio/neuralynxrawio.py index 70306ff29..16ee1860e 100644 --- a/neo/rawio/neuralynxrawio/neuralynxrawio.py +++ b/neo/rawio/neuralynxrawio/neuralynxrawio.py @@ -85,7 +85,13 @@ class NeuralynxRawIO(BaseRawIO): keep_original_times: bool, default: False If True, keep original start time as in files, Otherwise set 0 of time to first time in dataset - + strict_gap_mode: bool, default: True + Detect gaps using strict mode or not. + * strict_gap_mode = True then a gap is consider when timstamp difference between two + consequtive data packet is more than one sample interval. + * strict_gap_mode = False then a gap has an increased tolerance. Some new system with different clock need this option + otherwise, too many gaps are detected + Notes ----- * This IO supports NCS, NEV, NSE and NTT file formats (not NVT or NRD yet) @@ -125,7 +131,7 @@ class NeuralynxRawIO(BaseRawIO): ("samples", "int16", (NcsSection._RECORD_SIZE)), ] - def __init__(self, dirname="", filename="", exclude_filename=None, keep_original_times=False, **kargs): + def __init__(self, dirname="", filename="", exclude_filename=None, keep_original_times=False, strict_gap_mode=True, **kargs): if dirname != "": self.dirname = dirname @@ -137,6 +143,7 @@ def __init__(self, dirname="", filename="", exclude_filename=None, keep_original raise ValueError("One of dirname or filename must be provided.") self.keep_original_times = keep_original_times + self.strict_gap_mode = strict_gap_mode self.exclude_filename = exclude_filename BaseRawIO.__init__(self, **kargs) @@ -790,7 +797,7 @@ def scan_stream_ncs_files(self, ncs_filenames): verify_sec_struct = NcsSectionsFactory._verifySectionsStructure if not chanSectMap or (not verify_sec_struct(data, chan_ncs_sections)): - chan_ncs_sections = NcsSectionsFactory.build_for_ncs_file(data, nlxHeader) + chan_ncs_sections = NcsSectionsFactory.build_for_ncs_file(data, nlxHeader, strict_gap_mode=self.strict_gap_mode) # register file section structure for all contained channels for chan_uid in zip(nlxHeader["channel_names"], np.asarray(nlxHeader["channel_ids"], dtype=str)): diff --git a/neo/test/rawiotest/test_neuralynxrawio.py b/neo/test/rawiotest/test_neuralynxrawio.py index 1fbe0796b..c8a8cdc6c 100644 --- a/neo/test/rawiotest/test_neuralynxrawio.py +++ b/neo/test/rawiotest/test_neuralynxrawio.py @@ -228,7 +228,9 @@ def test_build_given_actual_frequency(self): ncsBlocks = NcsSections() ncsBlocks.sampFreqUsed = 1 / (35e-6) ncsBlocks.microsPerSampUsed = 35 - ncsBlocks = NcsSectionsFactory._buildGivenActualFrequency(data0, ncsBlocks.sampFreqUsed, 27789) + + ncsBlocks = NcsSectionsFactory._buildNcsSections(data0, ncsBlocks.sampFreqUsed) + self.assertEqual(len(ncsBlocks.sects), 1) self.assertEqual(ncsBlocks.sects[0].startRec, 0) self.assertEqual(ncsBlocks.sects[0].endRec, 9) @@ -267,7 +269,7 @@ def test_block_start_and_end_times(self): hdr = NlxHeader(filename) nb = NcsSectionsFactory.build_for_ncs_file(data0, hdr) self.assertListEqual([blk.startTime for blk in nb.sects], [8408806811, 8427832053, 8487768561]) - self.assertListEqual([blk.endTime for blk in nb.sects], [8427831990, 8487768498, 8515816549]) + self.assertListEqual([blk.endTime for blk in nb.sects], [8427831990, 8487768497, 8515816549]) # digitallynxsx with single block of records to exercise path in _buildForMaxGap filename = self.get_local_path("neuralynx/Cheetah_v1.1.0/original_data/CSC67_trunc.Ncs") @@ -322,7 +324,7 @@ class TestNcsSections(TestNeuralynxRawIO, unittest.TestCase): """ Test building NcsBlocks for files of different revisions. """ - + entities_to_test = [] def test_equality(self): @@ -372,3 +374,11 @@ def test_equality(self): if __name__ == "__main__": unittest.main() + + # test = TestNeuralynxRawIO() + # test.test_scan_ncs_files() + # test.test_exclude_filenames() + + # test = TestNcsSectionsFactory() + # test.test_ncsblocks_partial() + # test.test_build_given_actual_frequency()