Skip to content

Commit

Permalink
Merge pull request #119 from hed-standard/bids-tabular-file
Browse files Browse the repository at this point in the history
Implement validation for non-event TSV files
  • Loading branch information
happy5214 authored Aug 16, 2023
2 parents f7a0fb6 + b37d891 commit cb13607
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 161 deletions.
13 changes: 12 additions & 1 deletion bids/index.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
import { BidsDataset, BidsEventFile, BidsHedIssue, BidsTsvFile, BidsIssue, BidsJsonFile, BidsSidecar } from './types'
import {
BidsDataset,
BidsEventFile,
BidsTabularFile,
BidsHedIssue,
BidsTsvFile,
BidsIssue,
BidsJsonFile,
BidsSidecar,
} from './types'
import { validateBidsDataset } from './validate'

export {
BidsDataset,
BidsTsvFile,
BidsEventFile,
BidsTabularFile,
BidsJsonFile,
BidsSidecar,
BidsIssue,
Expand All @@ -16,6 +26,7 @@ export default {
BidsDataset,
BidsTsvFile,
BidsEventFile,
BidsTabularFile,
BidsJsonFile,
BidsSidecar,
BidsIssue,
Expand Down
31 changes: 31 additions & 0 deletions bids/tsvParser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* Module for parsing TSV files.
*
* Copied from https://github.com/bids-standard/bids-validator/blob/6fc6d152b52266934575442e61f1477ba18f42ec/bids-validator/validators/tsv/tsvParser.js
*/

const stripBOM = (str) => str.replace(/^\uFEFF/, '')
const normalizeEOL = (str) => str.replace(/\r\n/g, '\n').replace(/\r/g, '\n')
const isContentfulRow = (row) => row && !/^\s*$/.test(row)

/**
* Parse a TSV file.
*
* @param {string} contents The contents of a TSV file.
* @return {{headers: string[], rows: string[][]}} The parsed contents of the TSV file.
*/
function parseTSV(contents) {
const content = {
headers: [],
rows: [],
}
contents = stripBOM(contents)
content.rows = normalizeEOL(contents)
.split('\n')
.filter(isContentfulRow)
.map((str) => str.split('\t'))
content.headers = content.rows.length ? content.rows[0] : []
return content
}

export default parseTSV
119 changes: 97 additions & 22 deletions bids/types.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
import { sidecarValueHasHed } from './utils'
import { Issue } from '../common/issues/issues'
import parseTSV from './tsvParser'

/**
* Base class for BIDS data.
* @deprecated Will be removed in v4.0.0.
*/
class BidsData {
/**
* A mapping from unparsed HED strings to ParsedHedString objects.
* @deprecated Will be removed in v4.0.0.
* @type {Map<string, ParsedHedString>}
*/
parsedStringMapping
/**
* A Mapping from definition names to their associated ParsedHedGroup objects.
* @deprecated Will be removed in v4.0.0.
* @type {Map<string, ParsedHedGroup>}
*/
definitions
/**
* A list of HED validation issues.
* This will be converted to BidsIssue objects later on.
* @deprecated Will be removed in v4.0.0.
* @type {Issue[]}
*/
hedIssues
Expand All @@ -26,6 +34,9 @@ class BidsData {
}
}

/**
* A BIDS file.
*/
class BidsFile extends BidsData {
/**
* The name of this file.
Expand All @@ -46,6 +57,9 @@ class BidsFile extends BidsData {
}
}

/**
* A BIDS JSON file.
*/
export class BidsJsonFile extends BidsFile {
/**
* This file's JSON data.
Expand All @@ -59,21 +73,57 @@ export class BidsJsonFile extends BidsFile {
}
}

/**
* A BIDS TSV file.
*/
export class BidsTsvFile extends BidsFile {
/**
* This file's parsed TSV data.
* @type {object}
* @type {{headers: string[], rows: string[][]}}
*/
parsedTsv
/**
* HED strings in the "HED" column of the TSV data.
* @type {string[]}
*/
hedColumnHedStrings
/**
* The list of potential JSON sidecars.
* @type {string[]}
*/
potentialSidecars
/**
* The pseudo-sidecar object representing the merged sidecar data.
* @type {BidsSidecar}
*/
mergedSidecar
/**
* The extracted HED data for the merged pseudo-sidecar.
* @type {Map<string, string|Object<string, string>>}
*/
sidecarHedData

constructor(name, parsedTsv, file) {
/**
* Constructor.
*
* @todo This interface is provisional and subject to modification in version 4.0.0.
*
* @param {string} name The name of the TSV file.
* @param {{headers: string[], rows: string[][]}|string} tsvData This file's TSV data.
* @param {object} file The file object representing this file.
* @param {string[]} potentialSidecars The list of potential JSON sidecars.
* @param {object} mergedDictionary The merged sidecar data.
*/
constructor(name, tsvData, file, potentialSidecars = [], mergedDictionary = {}) {
super(name, file)
this.parsedTsv = parsedTsv
if (typeof tsvData === 'string') {
tsvData = parseTSV(tsvData)
}
this.parsedTsv = tsvData
this.potentialSidecars = potentialSidecars

this.mergedSidecar = new BidsSidecar(name, mergedDictionary, null)
this.sidecarHedData = this.mergedSidecar.hedData
this._parseHedColumn()
}

Expand All @@ -90,29 +140,43 @@ export class BidsTsvFile extends BidsFile {
}
}

/**
* A BIDS events.tsv file.
*/
export class BidsEventFile extends BidsTsvFile {
/**
* The potential JSON sidecar data.
* @type {string[]}
* Constructor.
*
* @todo This interface is subject to modification in version 4.0.0.
*
* @param {string} name The name of the event TSV file.
* @param {string[]} potentialSidecars The list of potential JSON sidecars.
* @param {object} mergedDictionary The merged sidecar data.
* @param {{headers: string[], rows: string[][]}|string} tsvData This file's TSV data.
* @param {object} file The file object representing this file.
*/
potentialSidecars
constructor(name, potentialSidecars, mergedDictionary, tsvData, file) {
super(name, tsvData, file, potentialSidecars, mergedDictionary)
}
}

/**
* A BIDS TSV file other than an events.tsv file.
*/
export class BidsTabularFile extends BidsTsvFile {
/**
* The pseudo-sidecar object representing the merged sidecar data.
* @type {BidsSidecar}
* Constructor.
*
* @todo This interface is subject to modification in version 4.0.0.
*
* @param {string} name The name of the TSV file.
* @param {string[]} potentialSidecars The list of potential JSON sidecars.
* @param {object} mergedDictionary The merged sidecar data.
* @param {{headers: string[], rows: string[][]}|string} tsvData This file's TSV data.
* @param {object} file The file object representing this file.
*/
mergedSidecar
/**
* The extracted HED data for the merged pseudo-sidecar.
* @type {Map<string, string|Object<string, string>>}
*/
sidecarHedData

constructor(name, potentialSidecars, mergedDictionary, parsedTsv, file) {
super(name, parsedTsv, file)
this.potentialSidecars = potentialSidecars

this.mergedSidecar = new BidsSidecar(name, mergedDictionary, null)
this.sidecarHedData = this.mergedSidecar.hedData
constructor(name, potentialSidecars, mergedDictionary, tsvData, file) {
super(name, tsvData, file, potentialSidecars, mergedDictionary)
}
}

Expand All @@ -133,6 +197,13 @@ export class BidsSidecar extends BidsJsonFile {
*/
hedCategoricalStrings

/**
* Constructor.
*
* @param {string} name The name of the sidecar file.
* @param {Object} sidecarData The raw JSON data.
* @param {Object} file The file object representing this file.
*/
constructor(name, sidecarData = {}, file) {
super(name, sidecarData, file)

Expand Down Expand Up @@ -182,7 +253,11 @@ export class BidsSidecar extends BidsJsonFile {
}
}

// TODO: Remove in v4.0.0.
/**
* Fallback default dataset_description.json file.
* @deprecated Will be removed in v4.0.0.
* @type {BidsJsonFile}
*/
const fallbackDatasetDescription = new BidsJsonFile('./dataset_description.json', null)

export class BidsDataset extends BidsData {
Expand Down
56 changes: 27 additions & 29 deletions bids/validate.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { validateHedDatasetWithContext } from '../validator/dataset'
import { validateHedString } from '../validator/event'
import { BidsDataset, BidsHedIssue, BidsIssue } from './types'
import { BidsDataset, BidsEventFile, BidsHedIssue, BidsIssue } from './types'
import { buildBidsSchemas } from './schema'
import { generateIssue, Issue, IssueError } from '../common/issues/issues'

Expand Down Expand Up @@ -39,7 +39,7 @@ function validateFullDataset(dataset, hedSchemas) {
return Promise.resolve([...sidecarIssues, ...hedColumnIssues])
}
const eventFileIssues = dataset.eventData.map((eventFileData) => {
return validateBidsEventFile(eventFileData, hedSchemas)
return validateBidsTsvFile(eventFileData, hedSchemas)
})
return Promise.resolve([].concat(sidecarIssues, hedColumnIssues, ...eventFileIssues))
} catch (e) {
Expand All @@ -48,18 +48,18 @@ function validateFullDataset(dataset, hedSchemas) {
}

/**
* Validate a BIDS event TSV file.
* Validate a BIDS TSV file.
*
* @param {BidsEventFile} eventFileData A BIDS event TSV file.
* @param {BidsTsvFile} tsvFileData A BIDS TSV file.
* @param {Schemas} hedSchemas A HED schema collection.
* @return {BidsIssue[]} Any issues found.
*/
function validateBidsEventFile(eventFileData, hedSchemas) {
const [hedStrings, tsvIssues] = parseTsvHed(eventFileData)
function validateBidsTsvFile(tsvFileData, hedSchemas) {
const [hedStrings, tsvIssues] = parseTsvHed(tsvFileData)
if (!hedStrings) {
return []
} else {
const datasetIssues = validateCombinedDataset(hedStrings, hedSchemas, eventFileData)
const datasetIssues = validateCombinedDataset(hedStrings, hedSchemas, tsvFileData)
return [...tsvIssues, ...datasetIssues]
}
}
Expand Down Expand Up @@ -108,33 +108,33 @@ function validateHedColumn(eventData, hedSchemas) {
}

/**
* Combine the BIDS sidecar HED data into a BIDS event TSV file's HED data.
* Combine the BIDS sidecar HED data into a BIDS TSV file's HED data.
*
* @param {BidsEventFile} eventFileData A BIDS event TSV file.
* @return {[string[], BidsIssue[]]} The combined HED strings for this BIDS event TSV file, and all issues found during the combination.
* @param {BidsTsvFile} tsvFileData A BIDS TSV file.
* @return {[string[], BidsIssue[]]} The combined HED strings for this BIDS TSV file, and all issues found during the combination.
*/
function parseTsvHed(eventFileData) {
function parseTsvHed(tsvFileData) {
const hedStrings = []
const issues = []
const sidecarHedColumnIndices = {}
for (const sidecarHedColumn of eventFileData.sidecarHedData.keys()) {
const sidecarHedColumnHeader = eventFileData.parsedTsv.headers.indexOf(sidecarHedColumn)
for (const sidecarHedColumn of tsvFileData.sidecarHedData.keys()) {
const sidecarHedColumnHeader = tsvFileData.parsedTsv.headers.indexOf(sidecarHedColumn)
if (sidecarHedColumnHeader > -1) {
sidecarHedColumnIndices[sidecarHedColumn] = sidecarHedColumnHeader
}
}
if (eventFileData.hedColumnHedStrings.length + sidecarHedColumnIndices.length === 0) {
if (tsvFileData.hedColumnHedStrings.length + sidecarHedColumnIndices.length === 0) {
return [[], []]
}

eventFileData.parsedTsv.rows.slice(1).forEach((rowCells, rowIndex) => {
tsvFileData.parsedTsv.rows.slice(1).forEach((rowCells, rowIndex) => {
// get the 'HED' field
const hedStringParts = []
if (eventFileData.hedColumnHedStrings[rowIndex]) {
hedStringParts.push(eventFileData.hedColumnHedStrings[rowIndex])
if (tsvFileData.hedColumnHedStrings[rowIndex]) {
hedStringParts.push(tsvFileData.hedColumnHedStrings[rowIndex])
}
for (const [sidecarHedColumn, sidecarHedIndex] of Object.entries(sidecarHedColumnIndices)) {
const sidecarHedData = eventFileData.sidecarHedData.get(sidecarHedColumn)
const sidecarHedData = tsvFileData.sidecarHedData.get(sidecarHedColumn)
const rowCell = rowCells[sidecarHedIndex]
if (rowCell && rowCell !== 'n/a') {
let sidecarHedString
Expand All @@ -154,9 +154,9 @@ function parseTsvHed(eventFileData) {
generateIssue('sidecarKeyMissing', {
key: rowCell,
column: sidecarHedColumn,
file: eventFileData.file.relativePath,
file: tsvFileData.file.relativePath,
}),
eventFileData.file,
tsvFileData.file,
),
)
}
Expand All @@ -176,17 +176,15 @@ function parseTsvHed(eventFileData) {
*
* @param {string[]} hedStrings The HED strings in the data collection.
* @param {Schemas} hedSchemas The HED schema collection to validate against.
* @param {BidsEventFile} eventFileData The BIDS event TSV file being validated.
* @param {BidsTsvFile} tsvFileData The BIDS event TSV file being validated.
* @return {BidsHedIssue[]} Any issues found.
*/
function validateCombinedDataset(hedStrings, hedSchemas, eventFileData) {
const [, hedIssues] = validateHedDatasetWithContext(
hedStrings,
eventFileData.mergedSidecar.hedStrings,
hedSchemas,
true,
)
return convertHedIssuesToBidsIssues(hedIssues, eventFileData.file)
function validateCombinedDataset(hedStrings, hedSchemas, tsvFileData) {
const [, hedIssues] = validateHedDatasetWithContext(hedStrings, tsvFileData.mergedSidecar.hedStrings, hedSchemas, {
checkForWarnings: true,
validateDatasetLevel: tsvFileData instanceof BidsEventFile,
})
return convertHedIssuesToBidsIssues(hedIssues, tsvFileData.file)
}

/**
Expand Down
Loading

0 comments on commit cb13607

Please sign in to comment.