From 13f69535c16f004638bfc9ce11ce460554b39da6 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 28 Feb 2024 18:05:43 +0100 Subject: [PATCH 1/7] New proposal for gap tolerance --- neo/rawio/neuralynxrawio/ncssections.py | 99 +++++++++++++++++++++---- 1 file changed, 85 insertions(+), 14 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index 4da0be177..44713d134 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -424,9 +424,76 @@ def _buildForMaxGap(ncsMemMap, nomFreq): nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) return nb + + @staticmethod + def _buildNcsGeneric(ncsMemMap, nlxHdr, sampFreq, gapTolerance=0): + """ + Build + + This replace: + _buildGivenActualFrequency + _buildForMaxGap + """ + + channel_id = ncsMemMap["channel_id"][0] + + ncsSects = NcsSections() + # TODO option to use this sampFreq or estimated one + ncsSects.sampFreqUsed = sampFreq + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreq) + + # check if file is one block of records, which is often the case, and avoid full parse + # need that last timestamp match the predicted one. + predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( + sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * ncsMemMap.shape[0] - 1 + ) + if ( + channel_id == ncsMemMap["channel_id"][-1] + and ncsMemMap["sample_rate"][0] == ncsMemMap["sample_rate"][-1] + and ncsMemMap["timestamp"][-1] == predLastBlockStartTime + ): + lastBlkEndTime = NcsSectionsFactory.calc_sample_time(sampFreq, ncsMemMap["timestamp"][-1], ncsMemMap["nb_valid"][-1]) + n_samples = NcsSection._RECORD_SIZE * (ncsMemMap.size - 1) + ncsMemMap["nb_valid"][-1] + section0 = NcsSection( + startRec=0, + startTime=ncsMemMap["timestamp"][0], + endRec=ncsMemMap.size - 1, + endTime=lastBlkEndTime, + n_samples=n_samples + ) + ncsSects.sects.append(section0) + + else: + # need to parse all data block to detect gaps + # check when the predicted timestamp is outside the tolerance + + delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64) + delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64) + gap_inds = np.flatnonzero(np.abs(delta - delta_prediction) > gapTolerance) + gap_inds += 1 + + sections_limits = [ 0 ] + gap_inds.tolist() + [len(ncsMemMap)] + + for i in range(len(gap_inds) + 1): + start = sections_limits[i] + stop = sections_limits[i + 1] + ncsSects.sects.append( + NcsSection( + startRec=start, + startTime=ncsMemMap["timestamp"][start], + endRec=stop-1, + endTime=ncsMemMap["timestamp"][stop-1] + np.uint64(ncsMemMap["nb_valid"][stop-1] / ncsSects.sampFreqUsed * 1e6), + n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]) + ) + ) + + return ncsSects + + + @staticmethod - def build_for_ncs_file(ncsMemMap, nlxHdr): + def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): """ Build an NcsSections object for an NcsFile, given as a memmap and NlxHeader, handling gap detection appropriately given the file type as specified by the header. @@ -443,32 +510,36 @@ def build_for_ncs_file(ncsMemMap, nlxHdr): An NcsSections corresponding to the provided ncsMemMap and nlxHdr """ acqType = nlxHdr.type_of_recording() + freq = nlxHdr["sampling_rate"] - # Old Neuralynx style with truncated whole microseconds for actual sampling. This - # restriction arose from the sampling being based on a master 1 MHz clock. if acqType == "PRE4": - freq = nlxHdr["sampling_rate"] + # Old Neuralynx style with truncated whole microseconds for actual sampling. This + # restriction arose from the sampling being based on a master 1 MHz clock. + microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq)) sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed) - nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(freq)) - nb.sampFreqUsed = sampFreqUsed - nb.microsPerSampUsed = microsPerSampUsed + if gapTolerance is None: + gapTolerance = 0 + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, sampFreqUsed, gapTolerance=gapTolerance) + ncsSects.sampFreqUsed = sampFreqUsed + ncsSects.microsPerSampUsed = microsPerSampUsed - # digital lynx style with fractional frequency and micros per samp determined from - # block times elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]: - nomFreq = nlxHdr["sampling_rate"] - nb = NcsSectionsFactory._buildForMaxGap(ncsMemMap, nomFreq) + # digital lynx style with fractional frequency and micros per samp determined from block times + if gapTolerance is None: + gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq) + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, sampFreqUsed, gapTolerance=gapTolerance) # BML & ATLAS style with fractional frequency and micros per samp elif acqType == "BML" or acqType == "ATLAS": + if gapTolerance is None: + gapTolerance = 0 sampFreqUsed = nlxHdr["sampling_rate"] - nb = NcsSectionsFactory._buildGivenActualFrequency(ncsMemMap, sampFreqUsed, math.floor(sampFreqUsed)) - + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, freq, gapTolerance=gapTolerance) else: raise TypeError("Unknown Ncs file type from header.") - return nb + return ncsSects @staticmethod def _verifySectionsStructure(ncsMemMap, ncsSects): From 23bc31646673a2f0d749e445a3abea1f3795b13b Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 4 Mar 2024 17:55:22 +0100 Subject: [PATCH 2/7] NeuralynxRawIO fix real sampling rate. Remove _buildGivenActualFrequency and _buildForMaxGap. --- neo/rawio/neuralynxrawio/ncssections.py | 462 +++++++++++----------- neo/test/rawiotest/test_neuralynxrawio.py | 16 +- 2 files changed, 251 insertions(+), 227 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index 44713d134..3e0c425c5 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -217,216 +217,216 @@ def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePred return ncsSects - @staticmethod - def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): - """ - Build NcsSections object for file given actual sampling frequency. - - Requires that frequency in each record agrees with requested frequency. This is - normally obtained by rounding the header frequency; however, this value may be different - from the rounded actual frequency used in the recording, since the underlying - requirement in older Ncs files was that the number of microseconds per sample in the - records is the inverse of the sampling frequency stated in the header truncated to - whole microseconds. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - actualSampFreq: - actual sampling frequency used - reqFreq: - frequency to require in records - - Returns - ------- - NcsSections object - """ - # check frequency in first record - if ncsMemMap["sample_rate"][0] != reqFreq: - raise IOError("Sampling frequency in first record doesn't agree with header.") - chanNum = ncsMemMap["channel_id"][0] - - ncsSects = NcsSections() - ncsSects.sampFreqUsed = actualSampFreq - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) - - # check if file is one block of records, which is often the case, and avoid full parse - lastBlkI = ncsMemMap.shape[0] - 1 - ts0 = ncsMemMap["timestamp"][0] - nb0 = ncsMemMap["nb_valid"][0] - predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( - actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI - ) - lts = ncsMemMap["timestamp"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] - if ( - ncsMemMap["channel_id"][lastBlkI] == chanNum - and ncsMemMap["sample_rate"][lastBlkI] == reqFreq - and lts == predLastBlockStartTime - ): - lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) - n_samples = NcsSection._RECORD_SIZE * lastBlkI - curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) - - ncsSects.sects.append(curBlock) - return ncsSects + # @staticmethod + # def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): + # """ + # Build NcsSections object for file given actual sampling frequency. + + # Requires that frequency in each record agrees with requested frequency. This is + # normally obtained by rounding the header frequency; however, this value may be different + # from the rounded actual frequency used in the recording, since the underlying + # requirement in older Ncs files was that the number of microseconds per sample in the + # records is the inverse of the sampling frequency stated in the header truncated to + # whole microseconds. + + # Parameters + # ---------- + # ncsMemMap: + # memmap of Ncs file + # actualSampFreq: + # actual sampling frequency used + # reqFreq: + # frequency to require in records + + # Returns + # ------- + # NcsSections object + # """ + # # check frequency in first record + # if ncsMemMap["sample_rate"][0] != reqFreq: + # raise IOError("Sampling frequency in first record doesn't agree with header.") + # chanNum = ncsMemMap["channel_id"][0] + + # ncsSects = NcsSections() + # ncsSects.sampFreqUsed = actualSampFreq + # ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) + + # # check if file is one block of records, which is often the case, and avoid full parse + # lastBlkI = ncsMemMap.shape[0] - 1 + # ts0 = ncsMemMap["timestamp"][0] + # nb0 = ncsMemMap["nb_valid"][0] + # predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( + # actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI + # ) + # lts = ncsMemMap["timestamp"][lastBlkI] + # lnb = ncsMemMap["nb_valid"][lastBlkI] + # if ( + # ncsMemMap["channel_id"][lastBlkI] == chanNum + # and ncsMemMap["sample_rate"][lastBlkI] == reqFreq + # and lts == predLastBlockStartTime + # ): + # lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) + # n_samples = NcsSection._RECORD_SIZE * lastBlkI + # curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) + + # ncsSects.sects.append(curBlock) + # return ncsSects - else: - # otherwise need to scan looking for data gaps - blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) - # curBlock = NcsSection(0, ts0, -1, -1, -1) - # ncsSects.sects.append(curBlock) - ncsSects = NcsSectionsFactory._parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime) - return ncsSects - - @staticmethod - def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): - """ - Parse blocks of records from file, allowing a maximum gap in timestamps between records - in sections. Estimates frequency being used based on timestamps. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - ncsSects: - NcsSections object with sampFreqUsed set to nominal frequency to use in computing time - for samples (Hz) - maxGapLen: - maximum difference within a block between predicted time of start of record and - recorded time - - Returns - ------- - NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from - largest block - """ - - chanNum = ncsMemMap["channel_id"][0] - recFreq = ncsMemMap["sample_rate"][0] - - # check for consistent channel_ids and sampling rates - ncsMemMap["channel_id"] - if not (ncsMemMap["channel_id"] == chanNum).all(): - raise IOError("Channel number changed in records within file") - - if not all(ncsMemMap["sample_rate"] == recFreq): - raise IOError("Sampling frequency changed in records within file") - - # find most frequent number of samples - exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) - # detect records with incomplete number of samples - gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) - - rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] - pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) - max_pred_times = pred_times + maxGapLen - # data records that start later than the predicted time (including the - # maximal accepted gap length) are considered delayed and a gap is - # registered. - delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) - gap_rec_ids.extend(delayed_recs) - - # cleaning extracted gap ids - # last record can not be the beginning of a gap - last_rec_id = len(ncsMemMap["timestamp"]) - 1 - if last_rec_id in gap_rec_ids: - gap_rec_ids.remove(last_rec_id) - - # gap ids can only be listed once - gap_rec_ids = sorted(set(gap_rec_ids)) - - # create recording segments from identified gaps - ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) - for gap_rec_id in gap_rec_ids: - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = gap_rec_id - curr_sec.endTime = pred_times[gap_rec_id] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) - curr_sec.n_samples = n_samples - - next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) - ncsSects.sects.append(next_sec) - - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 - curr_sec.endTime = pred_times[-1] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) - curr_sec.n_samples = n_samples - - # calculate the estimated frequency of the block with the most samples - max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) - max_blk = ncsSects.sects[max_blk_idx] - - maxBlkFreqEstimate = ( - (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) - * 1e6 - / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) - ) - - ncsSects.sampFreqUsed = maxBlkFreqEstimate - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) - # free memory that is unnecessarily occupied by the memmap - # (see https://github.com/numpy/numpy/issues/19340) - del ncsMemMap - return ncsSects - - @staticmethod - def _buildForMaxGap(ncsMemMap, nomFreq): - """ - Determine sections of records in memory mapped Ncs file given a nominal frequency of - the file, using the default values of frequency tolerance and maximum gap between blocks. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - nomFreq: - nominal sampling frequency used, normally from header of file - - Returns - ------- - NcsSections object - """ - nb = NcsSections() - - numRecs = ncsMemMap.shape[0] - if numRecs < 1: - return nb - - chanNum = ncsMemMap["channel_id"][0] - ts0 = ncsMemMap["timestamp"][0] - - lastBlkI = numRecs - 1 - lts = ncsMemMap["timestamp"][lastBlkI] - lcid = ncsMemMap["channel_id"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] - lsr = ncsMemMap["sample_rate"][lastBlkI] - - # check if file is one block of records, with exact timestamp match, which may be the case - numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI - predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) - freqInFile = math.floor(nomFreq) - if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: - endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) - curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) - nb.sects.append(curBlock) - nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - - # otherwise parse records to determine blocks using default maximum gap length - else: - nb.sampFreqUsed = nomFreq - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) - nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) - - return nb + # else: + # # otherwise need to scan looking for data gaps + # blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) + # # curBlock = NcsSection(0, ts0, -1, -1, -1) + # # ncsSects.sects.append(curBlock) + # ncsSects = NcsSectionsFactory._parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime) + # return ncsSects + + # @staticmethod + # def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): + # """ + # Parse blocks of records from file, allowing a maximum gap in timestamps between records + # in sections. Estimates frequency being used based on timestamps. + + # Parameters + # ---------- + # ncsMemMap: + # memmap of Ncs file + # ncsSects: + # NcsSections object with sampFreqUsed set to nominal frequency to use in computing time + # for samples (Hz) + # maxGapLen: + # maximum difference within a block between predicted time of start of record and + # recorded time + + # Returns + # ------- + # NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from + # largest block + # """ + + # chanNum = ncsMemMap["channel_id"][0] + # recFreq = ncsMemMap["sample_rate"][0] + + # # check for consistent channel_ids and sampling rates + # ncsMemMap["channel_id"] + # if not (ncsMemMap["channel_id"] == chanNum).all(): + # raise IOError("Channel number changed in records within file") + + # if not all(ncsMemMap["sample_rate"] == recFreq): + # raise IOError("Sampling frequency changed in records within file") + + # # find most frequent number of samples + # exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) + # # detect records with incomplete number of samples + # gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) + + # rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] + # pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) + # max_pred_times = pred_times + maxGapLen + # # data records that start later than the predicted time (including the + # # maximal accepted gap length) are considered delayed and a gap is + # # registered. + # delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) + # gap_rec_ids.extend(delayed_recs) + + # # cleaning extracted gap ids + # # last record can not be the beginning of a gap + # last_rec_id = len(ncsMemMap["timestamp"]) - 1 + # if last_rec_id in gap_rec_ids: + # gap_rec_ids.remove(last_rec_id) + + # # gap ids can only be listed once + # gap_rec_ids = sorted(set(gap_rec_ids)) + + # # create recording segments from identified gaps + # ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) + # for gap_rec_id in gap_rec_ids: + # curr_sec = ncsSects.sects[-1] + # curr_sec.endRec = gap_rec_id + # curr_sec.endTime = pred_times[gap_rec_id] + # n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) + # curr_sec.n_samples = n_samples + + # next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) + # ncsSects.sects.append(next_sec) + + # curr_sec = ncsSects.sects[-1] + # curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 + # curr_sec.endTime = pred_times[-1] + # n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) + # curr_sec.n_samples = n_samples + + # # calculate the estimated frequency of the block with the most samples + # max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) + # max_blk = ncsSects.sects[max_blk_idx] + + # maxBlkFreqEstimate = ( + # (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) + # * 1e6 + # / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) + # ) + + # ncsSects.sampFreqUsed = maxBlkFreqEstimate + # ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) + # # free memory that is unnecessarily occupied by the memmap + # # (see https://github.com/numpy/numpy/issues/19340) + # del ncsMemMap + # return ncsSects + + # @staticmethod + # def _buildForMaxGap(ncsMemMap, nomFreq): + # """ + # Determine sections of records in memory mapped Ncs file given a nominal frequency of + # the file, using the default values of frequency tolerance and maximum gap between blocks. + + # Parameters + # ---------- + # ncsMemMap: + # memmap of Ncs file + # nomFreq: + # nominal sampling frequency used, normally from header of file + + # Returns + # ------- + # NcsSections object + # """ + # nb = NcsSections() + + # numRecs = ncsMemMap.shape[0] + # if numRecs < 1: + # return nb + + # chanNum = ncsMemMap["channel_id"][0] + # ts0 = ncsMemMap["timestamp"][0] + + # lastBlkI = numRecs - 1 + # lts = ncsMemMap["timestamp"][lastBlkI] + # lcid = ncsMemMap["channel_id"][lastBlkI] + # lnb = ncsMemMap["nb_valid"][lastBlkI] + # lsr = ncsMemMap["sample_rate"][lastBlkI] + + # # check if file is one block of records, with exact timestamp match, which may be the case + # numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI + # predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) + # freqInFile = math.floor(nomFreq) + # if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: + # endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) + # curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) + # nb.sects.append(curBlock) + # nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 + # nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) + + # # otherwise parse records to determine blocks using default maximum gap length + # else: + # nb.sampFreqUsed = nomFreq + # nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) + # maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) + # nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) + + # return nb @staticmethod - def _buildNcsGeneric(ncsMemMap, nlxHdr, sampFreq, gapTolerance=0): + def _buildNcsGeneric(ncsMemMap, sampFreq, gapTolerance=0): """ Build @@ -434,18 +434,14 @@ def _buildNcsGeneric(ncsMemMap, nlxHdr, sampFreq, gapTolerance=0): _buildGivenActualFrequency _buildForMaxGap """ - channel_id = ncsMemMap["channel_id"][0] ncsSects = NcsSections() - # TODO option to use this sampFreq or estimated one - ncsSects.sampFreqUsed = sampFreq - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreq) # check if file is one block of records, which is often the case, and avoid full parse # need that last timestamp match the predicted one. predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( - sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * ncsMemMap.shape[0] - 1 + sampFreq, ncsMemMap["timestamp"][0], NcsSection._RECORD_SIZE * (ncsMemMap.shape[0] - 1) ) if ( channel_id == ncsMemMap["channel_id"][-1] @@ -466,9 +462,9 @@ def _buildNcsGeneric(ncsMemMap, nlxHdr, sampFreq, gapTolerance=0): else: # need to parse all data block to detect gaps # check when the predicted timestamp is outside the tolerance - delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64) - delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64) + delta_prediction = ((ncsMemMap["nb_valid"][:-1] / sampFreq) * 1e6).astype(np.int64) + gap_inds = np.flatnonzero(np.abs(delta - delta_prediction) > gapTolerance) gap_inds += 1 @@ -477,20 +473,18 @@ def _buildNcsGeneric(ncsMemMap, nlxHdr, sampFreq, gapTolerance=0): for i in range(len(gap_inds) + 1): start = sections_limits[i] stop = sections_limits[i + 1] + duration = np.uint64(1e6 / sampFreq * ncsMemMap["nb_valid"][stop-1]) ncsSects.sects.append( NcsSection( startRec=start, startTime=ncsMemMap["timestamp"][start], endRec=stop-1, - endTime=ncsMemMap["timestamp"][stop-1] + np.uint64(ncsMemMap["nb_valid"][stop-1] / ncsSects.sampFreqUsed * 1e6), + endTime=ncsMemMap["timestamp"][stop-1] + duration, n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]) ) ) - - return ncsSects - - + return ncsSects @staticmethod def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): @@ -515,12 +509,11 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): if acqType == "PRE4": # Old Neuralynx style with truncated whole microseconds for actual sampling. This # restriction arose from the sampling being based on a master 1 MHz clock. - microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq)) sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed) if gapTolerance is None: gapTolerance = 0 - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, sampFreqUsed, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance) ncsSects.sampFreqUsed = sampFreqUsed ncsSects.microsPerSampUsed = microsPerSampUsed @@ -528,14 +521,35 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): # digital lynx style with fractional frequency and micros per samp determined from block times if gapTolerance is None: gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq) - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, sampFreqUsed, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) + + + # take longer data block to compute reaal sampling rate + # ind_max = np.argmax([section.n_samples for section in ncsSects.sects]) + ind_max = np.argmax([section.endRec - section.startRec for section in ncsSects.sects]) + section = ncsSects.sects[ind_max] + if section.endRec != section.startRec: + # need several data block + sampFreqUsed = ( + (section.n_samples - ncsMemMap["nb_valid"][section.endRec]) + * 1e6 + / (ncsMemMap["timestamp"][section.endRec] - section.startTime) + ) + else: + sampFreqUsed = freq + ncsSects.sampFreqUsed = sampFreqUsed + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(sampFreqUsed) - # BML & ATLAS style with fractional frequency and micros per samp + elif acqType == "BML" or acqType == "ATLAS": + # BML & ATLAS style with fractional frequency and micros per samp if gapTolerance is None: gapTolerance = 0 - sampFreqUsed = nlxHdr["sampling_rate"] - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, nlxHdr, freq, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) + ncsSects.sampFreqUsed = freq + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq) + + else: raise TypeError("Unknown Ncs file type from header.") diff --git a/neo/test/rawiotest/test_neuralynxrawio.py b/neo/test/rawiotest/test_neuralynxrawio.py index a4ad2f4ea..2072b497e 100644 --- a/neo/test/rawiotest/test_neuralynxrawio.py +++ b/neo/test/rawiotest/test_neuralynxrawio.py @@ -228,7 +228,9 @@ def test_build_given_actual_frequency(self): ncsBlocks = NcsSections() ncsBlocks.sampFreqUsed = 1 / (35e-6) ncsBlocks.microsPerSampUsed = 35 - ncsBlocks = NcsSectionsFactory._buildGivenActualFrequency(data0, ncsBlocks.sampFreqUsed, 27789) + + ncsBlocks = NcsSectionsFactory._buildNcsGeneric(data0, ncsBlocks.sampFreqUsed) + self.assertEqual(len(ncsBlocks.sects), 1) self.assertEqual(ncsBlocks.sects[0].startRec, 0) self.assertEqual(ncsBlocks.sects[0].endRec, 9) @@ -267,7 +269,7 @@ def test_block_start_and_end_times(self): hdr = NlxHeader(filename) nb = NcsSectionsFactory.build_for_ncs_file(data0, hdr) self.assertListEqual([blk.startTime for blk in nb.sects], [8408806811, 8427832053, 8487768561]) - self.assertListEqual([blk.endTime for blk in nb.sects], [8427831990, 8487768498, 8515816549]) + self.assertListEqual([blk.endTime for blk in nb.sects], [8427831990, 8487768497, 8515816549]) # digitallynxsx with single block of records to exercise path in _buildForMaxGap filename = self.get_local_path("neuralynx/Cheetah_v1.1.0/original_data/CSC67_trunc.Ncs") @@ -322,7 +324,7 @@ class TestNcsSections(TestNeuralynxRawIO, unittest.TestCase): """ Test building NcsBlocks for files of different revisions. """ - + entities_to_test = [] def test_equality(self): @@ -372,3 +374,11 @@ def test_equality(self): if __name__ == "__main__": unittest.main() + + # test = TestNeuralynxRawIO() + # test.test_scan_ncs_files() + # test.test_exclude_filenames() + + # test = TestNcsSectionsFactory() + # test.test_ncsblocks_partial() + # test.test_build_given_actual_frequency() From 28cc1a59b66a4899344fe68d6d1c0071d0fe2fb2 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 4 Mar 2024 18:08:43 +0100 Subject: [PATCH 3/7] Add strict_gap_mode=True/False to handle new clock sustem in neuralynx --- neo/rawio/neuralynxrawio/ncssections.py | 22 ++++++++++++++++++---- neo/rawio/neuralynxrawio/neuralynxrawio.py | 13 ++++++++++--- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index 3e0c425c5..b40a67cb0 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -487,7 +487,7 @@ def _buildNcsGeneric(ncsMemMap, sampFreq, gapTolerance=0): return ncsSects @staticmethod - def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): + def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=True): """ Build an NcsSections object for an NcsFile, given as a memmap and NlxHeader, handling gap detection appropriately given the file type as specified by the header. @@ -512,7 +512,12 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): microsPerSampUsed = math.floor(NcsSectionsFactory.get_micros_per_samp_for_freq(freq)) sampFreqUsed = NcsSectionsFactory.get_freq_for_micros_per_samp(microsPerSampUsed) if gapTolerance is None: - gapTolerance = 0 + if strict_gap_mode: + # this is the old behavior, maybe we could put 0.9 sample interval no ? + gapTolerance = 0 + else: + gapTolerance = 0 + ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance) ncsSects.sampFreqUsed = sampFreqUsed ncsSects.microsPerSampUsed = microsPerSampUsed @@ -520,7 +525,12 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): elif acqType in ["DIGITALLYNX", "DIGITALLYNXSX", "CHEETAH64", "CHEETAH560", "RAWDATAFILE"]: # digital lynx style with fractional frequency and micros per samp determined from block times if gapTolerance is None: - gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq) + if strict_gap_mode: + # this is the old behavior + gapTolerance = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / freq) + else: + # quarter of paquet size is tolerate + gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) @@ -543,8 +553,12 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None): elif acqType == "BML" or acqType == "ATLAS": # BML & ATLAS style with fractional frequency and micros per samp - if gapTolerance is None: + if strict_gap_mode: + # this is the old behavior, maybe we could put 0.9 sample interval no ? gapTolerance = 0 + else: + # quarter of paquet size is tolerate + gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) ncsSects.sampFreqUsed = freq ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq) diff --git a/neo/rawio/neuralynxrawio/neuralynxrawio.py b/neo/rawio/neuralynxrawio/neuralynxrawio.py index f4c2b27bf..5d4fb316a 100644 --- a/neo/rawio/neuralynxrawio/neuralynxrawio.py +++ b/neo/rawio/neuralynxrawio/neuralynxrawio.py @@ -85,7 +85,13 @@ class NeuralynxRawIO(BaseRawIO): keep_original_times: bool, default: False If True, keep original start time as in files, Otherwise set 0 of time to first time in dataset - + strict_gap_mode: bool, default: True + Detect gaps using strict mode or not. + * strict_gap_mode = True then a gap is consider when timstamp difference between two + consequtive data packet is more than one sample interval. + * strict_gap_mode = False then a gap has an increased tolerance. Some new system with different clock need this option + otherwise, too many gaps are detected + Notes ----- * This IO supports NCS, NEV, NSE and NTT file formats (not NVT or NRD yet) @@ -125,7 +131,7 @@ class NeuralynxRawIO(BaseRawIO): ("samples", "int16", (NcsSection._RECORD_SIZE)), ] - def __init__(self, dirname="", filename="", exclude_filename=None, keep_original_times=False, **kargs): + def __init__(self, dirname="", filename="", exclude_filename=None, keep_original_times=False, strict_gap_mode=True, **kargs): if dirname != "": self.dirname = dirname @@ -137,6 +143,7 @@ def __init__(self, dirname="", filename="", exclude_filename=None, keep_original raise ValueError("One of dirname or filename must be provided.") self.keep_original_times = keep_original_times + self.strict_gap_mode = strict_gap_mode self.exclude_filename = exclude_filename BaseRawIO.__init__(self, **kargs) @@ -790,7 +797,7 @@ def scan_stream_ncs_files(self, ncs_filenames): verify_sec_struct = NcsSectionsFactory._verifySectionsStructure if not chanSectMap or (not verify_sec_struct(data, chan_ncs_sections)): - chan_ncs_sections = NcsSectionsFactory.build_for_ncs_file(data, nlxHeader) + chan_ncs_sections = NcsSectionsFactory.build_for_ncs_file(data, nlxHeader, strict_gap_mode=self.strict_gap_mode) # register file section structure for all contained channels for chan_uid in zip(nlxHeader["channel_names"], np.asarray(nlxHeader["channel_ids"], dtype=str)): From 482abca4ced9cd39408122ad14c5a244e9617132 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 4 Mar 2024 18:11:13 +0100 Subject: [PATCH 4/7] put back function to avoid git conflict --- neo/rawio/neuralynxrawio/ncssections.py | 410 ++++++++++++------------ 1 file changed, 205 insertions(+), 205 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index b40a67cb0..33570d333 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -217,213 +217,213 @@ def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePred return ncsSects - # @staticmethod - # def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): - # """ - # Build NcsSections object for file given actual sampling frequency. - - # Requires that frequency in each record agrees with requested frequency. This is - # normally obtained by rounding the header frequency; however, this value may be different - # from the rounded actual frequency used in the recording, since the underlying - # requirement in older Ncs files was that the number of microseconds per sample in the - # records is the inverse of the sampling frequency stated in the header truncated to - # whole microseconds. - - # Parameters - # ---------- - # ncsMemMap: - # memmap of Ncs file - # actualSampFreq: - # actual sampling frequency used - # reqFreq: - # frequency to require in records - - # Returns - # ------- - # NcsSections object - # """ - # # check frequency in first record - # if ncsMemMap["sample_rate"][0] != reqFreq: - # raise IOError("Sampling frequency in first record doesn't agree with header.") - # chanNum = ncsMemMap["channel_id"][0] - - # ncsSects = NcsSections() - # ncsSects.sampFreqUsed = actualSampFreq - # ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) - - # # check if file is one block of records, which is often the case, and avoid full parse - # lastBlkI = ncsMemMap.shape[0] - 1 - # ts0 = ncsMemMap["timestamp"][0] - # nb0 = ncsMemMap["nb_valid"][0] - # predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( - # actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI - # ) - # lts = ncsMemMap["timestamp"][lastBlkI] - # lnb = ncsMemMap["nb_valid"][lastBlkI] - # if ( - # ncsMemMap["channel_id"][lastBlkI] == chanNum - # and ncsMemMap["sample_rate"][lastBlkI] == reqFreq - # and lts == predLastBlockStartTime - # ): - # lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) - # n_samples = NcsSection._RECORD_SIZE * lastBlkI - # curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) - - # ncsSects.sects.append(curBlock) - # return ncsSects + @staticmethod + def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): + """ + Build NcsSections object for file given actual sampling frequency. + + Requires that frequency in each record agrees with requested frequency. This is + normally obtained by rounding the header frequency; however, this value may be different + from the rounded actual frequency used in the recording, since the underlying + requirement in older Ncs files was that the number of microseconds per sample in the + records is the inverse of the sampling frequency stated in the header truncated to + whole microseconds. + + Parameters + ---------- + ncsMemMap: + memmap of Ncs file + actualSampFreq: + actual sampling frequency used + reqFreq: + frequency to require in records + + Returns + ------- + NcsSections object + """ + # check frequency in first record + if ncsMemMap["sample_rate"][0] != reqFreq: + raise IOError("Sampling frequency in first record doesn't agree with header.") + chanNum = ncsMemMap["channel_id"][0] + + ncsSects = NcsSections() + ncsSects.sampFreqUsed = actualSampFreq + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) + + # check if file is one block of records, which is often the case, and avoid full parse + lastBlkI = ncsMemMap.shape[0] - 1 + ts0 = ncsMemMap["timestamp"][0] + nb0 = ncsMemMap["nb_valid"][0] + predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( + actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI + ) + lts = ncsMemMap["timestamp"][lastBlkI] + lnb = ncsMemMap["nb_valid"][lastBlkI] + if ( + ncsMemMap["channel_id"][lastBlkI] == chanNum + and ncsMemMap["sample_rate"][lastBlkI] == reqFreq + and lts == predLastBlockStartTime + ): + lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) + n_samples = NcsSection._RECORD_SIZE * lastBlkI + curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) + + ncsSects.sects.append(curBlock) + return ncsSects - # else: - # # otherwise need to scan looking for data gaps - # blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) - # # curBlock = NcsSection(0, ts0, -1, -1, -1) - # # ncsSects.sects.append(curBlock) - # ncsSects = NcsSectionsFactory._parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime) - # return ncsSects - - # @staticmethod - # def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): - # """ - # Parse blocks of records from file, allowing a maximum gap in timestamps between records - # in sections. Estimates frequency being used based on timestamps. - - # Parameters - # ---------- - # ncsMemMap: - # memmap of Ncs file - # ncsSects: - # NcsSections object with sampFreqUsed set to nominal frequency to use in computing time - # for samples (Hz) - # maxGapLen: - # maximum difference within a block between predicted time of start of record and - # recorded time - - # Returns - # ------- - # NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from - # largest block - # """ - - # chanNum = ncsMemMap["channel_id"][0] - # recFreq = ncsMemMap["sample_rate"][0] - - # # check for consistent channel_ids and sampling rates - # ncsMemMap["channel_id"] - # if not (ncsMemMap["channel_id"] == chanNum).all(): - # raise IOError("Channel number changed in records within file") - - # if not all(ncsMemMap["sample_rate"] == recFreq): - # raise IOError("Sampling frequency changed in records within file") - - # # find most frequent number of samples - # exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) - # # detect records with incomplete number of samples - # gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) - - # rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] - # pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) - # max_pred_times = pred_times + maxGapLen - # # data records that start later than the predicted time (including the - # # maximal accepted gap length) are considered delayed and a gap is - # # registered. - # delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) - # gap_rec_ids.extend(delayed_recs) - - # # cleaning extracted gap ids - # # last record can not be the beginning of a gap - # last_rec_id = len(ncsMemMap["timestamp"]) - 1 - # if last_rec_id in gap_rec_ids: - # gap_rec_ids.remove(last_rec_id) - - # # gap ids can only be listed once - # gap_rec_ids = sorted(set(gap_rec_ids)) - - # # create recording segments from identified gaps - # ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) - # for gap_rec_id in gap_rec_ids: - # curr_sec = ncsSects.sects[-1] - # curr_sec.endRec = gap_rec_id - # curr_sec.endTime = pred_times[gap_rec_id] - # n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) - # curr_sec.n_samples = n_samples - - # next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) - # ncsSects.sects.append(next_sec) - - # curr_sec = ncsSects.sects[-1] - # curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 - # curr_sec.endTime = pred_times[-1] - # n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) - # curr_sec.n_samples = n_samples - - # # calculate the estimated frequency of the block with the most samples - # max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) - # max_blk = ncsSects.sects[max_blk_idx] - - # maxBlkFreqEstimate = ( - # (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) - # * 1e6 - # / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) - # ) - - # ncsSects.sampFreqUsed = maxBlkFreqEstimate - # ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) - # # free memory that is unnecessarily occupied by the memmap - # # (see https://github.com/numpy/numpy/issues/19340) - # del ncsMemMap - # return ncsSects - - # @staticmethod - # def _buildForMaxGap(ncsMemMap, nomFreq): - # """ - # Determine sections of records in memory mapped Ncs file given a nominal frequency of - # the file, using the default values of frequency tolerance and maximum gap between blocks. - - # Parameters - # ---------- - # ncsMemMap: - # memmap of Ncs file - # nomFreq: - # nominal sampling frequency used, normally from header of file - - # Returns - # ------- - # NcsSections object - # """ - # nb = NcsSections() - - # numRecs = ncsMemMap.shape[0] - # if numRecs < 1: - # return nb - - # chanNum = ncsMemMap["channel_id"][0] - # ts0 = ncsMemMap["timestamp"][0] - - # lastBlkI = numRecs - 1 - # lts = ncsMemMap["timestamp"][lastBlkI] - # lcid = ncsMemMap["channel_id"][lastBlkI] - # lnb = ncsMemMap["nb_valid"][lastBlkI] - # lsr = ncsMemMap["sample_rate"][lastBlkI] - - # # check if file is one block of records, with exact timestamp match, which may be the case - # numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI - # predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) - # freqInFile = math.floor(nomFreq) - # if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: - # endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) - # curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) - # nb.sects.append(curBlock) - # nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 - # nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - - # # otherwise parse records to determine blocks using default maximum gap length - # else: - # nb.sampFreqUsed = nomFreq - # nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - # maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) - # nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) - - # return nb + else: + # otherwise need to scan looking for data gaps + blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) + # curBlock = NcsSection(0, ts0, -1, -1, -1) + # ncsSects.sects.append(curBlock) + ncsSects = NcsSectionsFactory._parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime) + return ncsSects + + @staticmethod + def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): + """ + Parse blocks of records from file, allowing a maximum gap in timestamps between records + in sections. Estimates frequency being used based on timestamps. + + Parameters + ---------- + ncsMemMap: + memmap of Ncs file + ncsSects: + NcsSections object with sampFreqUsed set to nominal frequency to use in computing time + for samples (Hz) + maxGapLen: + maximum difference within a block between predicted time of start of record and + recorded time + + Returns + ------- + NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from + largest block + """ + + chanNum = ncsMemMap["channel_id"][0] + recFreq = ncsMemMap["sample_rate"][0] + + # check for consistent channel_ids and sampling rates + ncsMemMap["channel_id"] + if not (ncsMemMap["channel_id"] == chanNum).all(): + raise IOError("Channel number changed in records within file") + + if not all(ncsMemMap["sample_rate"] == recFreq): + raise IOError("Sampling frequency changed in records within file") + + # find most frequent number of samples + exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) + # detect records with incomplete number of samples + gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) + + rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] + pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) + max_pred_times = pred_times + maxGapLen + # data records that start later than the predicted time (including the + # maximal accepted gap length) are considered delayed and a gap is + # registered. + delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) + gap_rec_ids.extend(delayed_recs) + + # cleaning extracted gap ids + # last record can not be the beginning of a gap + last_rec_id = len(ncsMemMap["timestamp"]) - 1 + if last_rec_id in gap_rec_ids: + gap_rec_ids.remove(last_rec_id) + + # gap ids can only be listed once + gap_rec_ids = sorted(set(gap_rec_ids)) + + # create recording segments from identified gaps + ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) + for gap_rec_id in gap_rec_ids: + curr_sec = ncsSects.sects[-1] + curr_sec.endRec = gap_rec_id + curr_sec.endTime = pred_times[gap_rec_id] + n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) + curr_sec.n_samples = n_samples + + next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) + ncsSects.sects.append(next_sec) + + curr_sec = ncsSects.sects[-1] + curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 + curr_sec.endTime = pred_times[-1] + n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) + curr_sec.n_samples = n_samples + + # calculate the estimated frequency of the block with the most samples + max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) + max_blk = ncsSects.sects[max_blk_idx] + + maxBlkFreqEstimate = ( + (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) + * 1e6 + / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) + ) + + ncsSects.sampFreqUsed = maxBlkFreqEstimate + ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) + # free memory that is unnecessarily occupied by the memmap + # (see https://github.com/numpy/numpy/issues/19340) + del ncsMemMap + return ncsSects + + @staticmethod + def _buildForMaxGap(ncsMemMap, nomFreq): + """ + Determine sections of records in memory mapped Ncs file given a nominal frequency of + the file, using the default values of frequency tolerance and maximum gap between blocks. + + Parameters + ---------- + ncsMemMap: + memmap of Ncs file + nomFreq: + nominal sampling frequency used, normally from header of file + + Returns + ------- + NcsSections object + """ + nb = NcsSections() + + numRecs = ncsMemMap.shape[0] + if numRecs < 1: + return nb + + chanNum = ncsMemMap["channel_id"][0] + ts0 = ncsMemMap["timestamp"][0] + + lastBlkI = numRecs - 1 + lts = ncsMemMap["timestamp"][lastBlkI] + lcid = ncsMemMap["channel_id"][lastBlkI] + lnb = ncsMemMap["nb_valid"][lastBlkI] + lsr = ncsMemMap["sample_rate"][lastBlkI] + + # check if file is one block of records, with exact timestamp match, which may be the case + numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI + predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) + freqInFile = math.floor(nomFreq) + if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: + endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) + curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) + nb.sects.append(curBlock) + nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 + nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) + + # otherwise parse records to determine blocks using default maximum gap length + else: + nb.sampFreqUsed = nomFreq + nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) + maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) + nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) + + return nb @staticmethod def _buildNcsGeneric(ncsMemMap, sampFreq, gapTolerance=0): From 6385b0fd5e1e91f30275872a9a3bafef0569cfa5 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Mon, 4 Mar 2024 18:14:41 +0100 Subject: [PATCH 5/7] final clean --- neo/rawio/neuralynxrawio/ncssections.py | 255 ------------------------ 1 file changed, 255 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index 33570d333..a9bc47c7d 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -170,261 +170,6 @@ def calc_sample_time(sampFr, startTime, posn): """ return round(startTime + NcsSectionsFactory.get_micros_per_samp_for_freq(sampFr) * posn) - @staticmethod - def _parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime): - """ - Parse sections in memory mapped file when microsPerSampUsed and sampFreqUsed are known, - filling in an NcsSections object. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - ncsSections: - NcsSections with actual sampFreqUsed correct, first NcsSection with proper startSect - and startTime already added. - chanNum: - channel number that should be present in all records - reqFreq: - rounded frequency that all records should contain - blkOnePredTime: - predicted starting time of second record in block - - Returns - ------- - NcsSections object with block locations marked - """ - - # New code numpy vector based (speedup X50) - delta = (ncsMemMap["timestamp"][1:] - ncsMemMap["timestamp"][:-1]).astype(np.int64) - delta_prediction = ((ncsMemMap["nb_valid"][:-1] / ncsSects.sampFreqUsed) * 1e6).astype(np.int64) - gap_inds = np.flatnonzero((delta - delta_prediction) != 0) - gap_inds += 1 - sections_limits = [ 0 ] + gap_inds.tolist() + [len(ncsMemMap)] - - for i in range(len(gap_inds) + 1): - start = sections_limits[i] - stop = sections_limits[i + 1] - ncsSects.sects.append( - NcsSection( - startRec=start, - startTime=ncsMemMap["timestamp"][start], - endRec=stop-1, - endTime=ncsMemMap["timestamp"][stop-1] + np.uint64(ncsMemMap["nb_valid"][stop-1] / ncsSects.sampFreqUsed * 1e6), - n_samples=np.sum(ncsMemMap["nb_valid"][start:stop]) - ) - ) - - return ncsSects - - @staticmethod - def _buildGivenActualFrequency(ncsMemMap, actualSampFreq, reqFreq): - """ - Build NcsSections object for file given actual sampling frequency. - - Requires that frequency in each record agrees with requested frequency. This is - normally obtained by rounding the header frequency; however, this value may be different - from the rounded actual frequency used in the recording, since the underlying - requirement in older Ncs files was that the number of microseconds per sample in the - records is the inverse of the sampling frequency stated in the header truncated to - whole microseconds. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - actualSampFreq: - actual sampling frequency used - reqFreq: - frequency to require in records - - Returns - ------- - NcsSections object - """ - # check frequency in first record - if ncsMemMap["sample_rate"][0] != reqFreq: - raise IOError("Sampling frequency in first record doesn't agree with header.") - chanNum = ncsMemMap["channel_id"][0] - - ncsSects = NcsSections() - ncsSects.sampFreqUsed = actualSampFreq - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(actualSampFreq) - - # check if file is one block of records, which is often the case, and avoid full parse - lastBlkI = ncsMemMap.shape[0] - 1 - ts0 = ncsMemMap["timestamp"][0] - nb0 = ncsMemMap["nb_valid"][0] - predLastBlockStartTime = NcsSectionsFactory.calc_sample_time( - actualSampFreq, ts0, NcsSection._RECORD_SIZE * lastBlkI - ) - lts = ncsMemMap["timestamp"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] - if ( - ncsMemMap["channel_id"][lastBlkI] == chanNum - and ncsMemMap["sample_rate"][lastBlkI] == reqFreq - and lts == predLastBlockStartTime - ): - lastBlkEndTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, lts, lnb) - n_samples = NcsSection._RECORD_SIZE * lastBlkI - curBlock = NcsSection(0, ts0, lastBlkI, lastBlkEndTime, n_samples) - - ncsSects.sects.append(curBlock) - return ncsSects - - - else: - # otherwise need to scan looking for data gaps - blkOnePredTime = NcsSectionsFactory.calc_sample_time(actualSampFreq, ts0, nb0) - # curBlock = NcsSection(0, ts0, -1, -1, -1) - # ncsSects.sects.append(curBlock) - ncsSects = NcsSectionsFactory._parseGivenActualFrequency(ncsMemMap, ncsSects, chanNum, reqFreq, blkOnePredTime) - return ncsSects - - @staticmethod - def _parseForMaxGap(ncsMemMap, ncsSects, maxGapLen): - """ - Parse blocks of records from file, allowing a maximum gap in timestamps between records - in sections. Estimates frequency being used based on timestamps. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - ncsSects: - NcsSections object with sampFreqUsed set to nominal frequency to use in computing time - for samples (Hz) - maxGapLen: - maximum difference within a block between predicted time of start of record and - recorded time - - Returns - ------- - NcsSections object with sampFreqUsed and microsPerSamp set based on estimate from - largest block - """ - - chanNum = ncsMemMap["channel_id"][0] - recFreq = ncsMemMap["sample_rate"][0] - - # check for consistent channel_ids and sampling rates - ncsMemMap["channel_id"] - if not (ncsMemMap["channel_id"] == chanNum).all(): - raise IOError("Channel number changed in records within file") - - if not all(ncsMemMap["sample_rate"] == recFreq): - raise IOError("Sampling frequency changed in records within file") - - # find most frequent number of samples - exp_nb_valid = np.argmax(np.bincount(ncsMemMap["nb_valid"])) - # detect records with incomplete number of samples - gap_rec_ids = list(np.where(ncsMemMap["nb_valid"] != exp_nb_valid)[0]) - - rec_duration = 1e6 / ncsSects.sampFreqUsed * ncsMemMap["nb_valid"] - pred_times = np.rint(ncsMemMap["timestamp"] + rec_duration).astype(np.int64) - max_pred_times = pred_times + maxGapLen - # data records that start later than the predicted time (including the - # maximal accepted gap length) are considered delayed and a gap is - # registered. - delayed_recs = list(np.where(max_pred_times[:-1] < ncsMemMap["timestamp"][1:])[0]) - gap_rec_ids.extend(delayed_recs) - - # cleaning extracted gap ids - # last record can not be the beginning of a gap - last_rec_id = len(ncsMemMap["timestamp"]) - 1 - if last_rec_id in gap_rec_ids: - gap_rec_ids.remove(last_rec_id) - - # gap ids can only be listed once - gap_rec_ids = sorted(set(gap_rec_ids)) - - # create recording segments from identified gaps - ncsSects.sects.append(NcsSection(0, ncsMemMap["timestamp"][0], -1, -1, -1)) - for gap_rec_id in gap_rec_ids: - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = gap_rec_id - curr_sec.endTime = pred_times[gap_rec_id] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec : gap_rec_id + 1]) - curr_sec.n_samples = n_samples - - next_sec = NcsSection(gap_rec_id + 1, ncsMemMap["timestamp"][gap_rec_id + 1], -1, -1, -1) - ncsSects.sects.append(next_sec) - - curr_sec = ncsSects.sects[-1] - curr_sec.endRec = len(ncsMemMap["timestamp"]) - 1 - curr_sec.endTime = pred_times[-1] - n_samples = np.sum(ncsMemMap["nb_valid"][curr_sec.startRec :]) - curr_sec.n_samples = n_samples - - # calculate the estimated frequency of the block with the most samples - max_blk_idx = np.argmax([bl.endRec - bl.startRec for bl in ncsSects.sects]) - max_blk = ncsSects.sects[max_blk_idx] - - maxBlkFreqEstimate = ( - (max_blk.n_samples - ncsMemMap["nb_valid"][max_blk.endRec]) - * 1e6 - / (ncsMemMap["timestamp"][max_blk.endRec] - max_blk.startTime) - ) - - ncsSects.sampFreqUsed = maxBlkFreqEstimate - ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(maxBlkFreqEstimate) - # free memory that is unnecessarily occupied by the memmap - # (see https://github.com/numpy/numpy/issues/19340) - del ncsMemMap - return ncsSects - - @staticmethod - def _buildForMaxGap(ncsMemMap, nomFreq): - """ - Determine sections of records in memory mapped Ncs file given a nominal frequency of - the file, using the default values of frequency tolerance and maximum gap between blocks. - - Parameters - ---------- - ncsMemMap: - memmap of Ncs file - nomFreq: - nominal sampling frequency used, normally from header of file - - Returns - ------- - NcsSections object - """ - nb = NcsSections() - - numRecs = ncsMemMap.shape[0] - if numRecs < 1: - return nb - - chanNum = ncsMemMap["channel_id"][0] - ts0 = ncsMemMap["timestamp"][0] - - lastBlkI = numRecs - 1 - lts = ncsMemMap["timestamp"][lastBlkI] - lcid = ncsMemMap["channel_id"][lastBlkI] - lnb = ncsMemMap["nb_valid"][lastBlkI] - lsr = ncsMemMap["sample_rate"][lastBlkI] - - # check if file is one block of records, with exact timestamp match, which may be the case - numSampsForPred = NcsSection._RECORD_SIZE * lastBlkI - predLastBlockStartTime = NcsSectionsFactory.calc_sample_time(nomFreq, ts0, numSampsForPred) - freqInFile = math.floor(nomFreq) - if lts - predLastBlockStartTime == 0 and lcid == chanNum and lsr == freqInFile: - endTime = NcsSectionsFactory.calc_sample_time(nomFreq, lts, lnb) - curBlock = NcsSection(0, ts0, lastBlkI, endTime, numSampsForPred) - nb.sects.append(curBlock) - nb.sampFreqUsed = (numSampsForPred + lnb) / (endTime - ts0) * 1e6 - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - - # otherwise parse records to determine blocks using default maximum gap length - else: - nb.sampFreqUsed = nomFreq - nb.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(nb.sampFreqUsed) - maxGapToAllow = round(NcsSectionsFactory._maxGapSampFrac * 1e6 / nomFreq) - nb = NcsSectionsFactory._parseForMaxGap(ncsMemMap, nb, maxGapToAllow) - - return nb - @staticmethod def _buildNcsGeneric(ncsMemMap, sampFreq, gapTolerance=0): """ From 8e633c500d46755e720889d255e29410c59ec10f Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 6 Mar 2024 10:32:04 +0100 Subject: [PATCH 6/7] rename --- neo/rawio/neuralynxrawio/ncssections.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/neo/rawio/neuralynxrawio/ncssections.py b/neo/rawio/neuralynxrawio/ncssections.py index 1b57338b7..462913f72 100644 --- a/neo/rawio/neuralynxrawio/ncssections.py +++ b/neo/rawio/neuralynxrawio/ncssections.py @@ -172,13 +172,9 @@ def calc_sample_time(sampFr, startTime, posn): return round(startTime + NcsSectionsFactory.get_micros_per_samp_for_freq(sampFr) * posn) @staticmethod - def _buildNcsGeneric(ncsMemMap, sampFreq, gapTolerance=0): + def _buildNcsSections(ncsMemMap, sampFreq, gapTolerance=0): """ - Build - - This replace: - _buildGivenActualFrequency - _buildForMaxGap + Construct NcsSections with fast mode when no gaps or parsing the file to detect gaps. """ channel_id = ncsMemMap["channel_id"][0] @@ -264,7 +260,7 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru else: gapTolerance = 0 - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, sampFreqUsed, gapTolerance=gapTolerance) ncsSects.sampFreqUsed = sampFreqUsed ncsSects.microsPerSampUsed = microsPerSampUsed @@ -277,7 +273,7 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru else: # quarter of paquet size is tolerate gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance) # take longer data block to compute reaal sampling rate @@ -305,7 +301,7 @@ def build_for_ncs_file(ncsMemMap, nlxHdr, gapTolerance=None, strict_gap_mode=Tru else: # quarter of paquet size is tolerate gapTolerance = round(0.25 * NcsSection._RECORD_SIZE * 1e6 / freq) - ncsSects = NcsSectionsFactory._buildNcsGeneric(ncsMemMap, freq, gapTolerance=gapTolerance) + ncsSects = NcsSectionsFactory._buildNcsSections(ncsMemMap, freq, gapTolerance=gapTolerance) ncsSects.sampFreqUsed = freq ncsSects.microsPerSampUsed = NcsSectionsFactory.get_micros_per_samp_for_freq(freq) From 1ed52bd8cd1042d0769d4454683318250429f128 Mon Sep 17 00:00:00 2001 From: Samuel Garcia Date: Wed, 6 Mar 2024 11:41:05 +0100 Subject: [PATCH 7/7] oups --- neo/test/rawiotest/test_neuralynxrawio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neo/test/rawiotest/test_neuralynxrawio.py b/neo/test/rawiotest/test_neuralynxrawio.py index 8674db748..c8a8cdc6c 100644 --- a/neo/test/rawiotest/test_neuralynxrawio.py +++ b/neo/test/rawiotest/test_neuralynxrawio.py @@ -229,7 +229,7 @@ def test_build_given_actual_frequency(self): ncsBlocks.sampFreqUsed = 1 / (35e-6) ncsBlocks.microsPerSampUsed = 35 - ncsBlocks = NcsSectionsFactory._buildNcsGeneric(data0, ncsBlocks.sampFreqUsed) + ncsBlocks = NcsSectionsFactory._buildNcsSections(data0, ncsBlocks.sampFreqUsed) self.assertEqual(len(ncsBlocks.sects), 1) self.assertEqual(ncsBlocks.sects[0].startRec, 0)