diff --git a/bids/types/json.js b/bids/types/json.js index 36da7826..814b531c 100644 --- a/bids/types/json.js +++ b/bids/types/json.js @@ -1,7 +1,7 @@ import isPlainObject from 'lodash/isPlainObject' import { sidecarValueHasHed } from '../utils' -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ParsedHedString from '../../parser/parsedHedString' import { BidsFile } from './basic' import BidsHedSidecarValidator from '../validator/bidsHedSidecarValidator' diff --git a/bids/validator/bidsHedTsvValidator.js b/bids/validator/bidsHedTsvValidator.js index 6ecf52d2..231ea14b 100644 --- a/bids/validator/bidsHedTsvValidator.js +++ b/bids/validator/bidsHedTsvValidator.js @@ -1,7 +1,7 @@ import BidsHedSidecarValidator from './bidsHedSidecarValidator' import { BidsHedIssue, BidsIssue } from '../types/issues' import { BidsTsvEvent, BidsTsvRow } from '../types/tsv' -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ColumnSplicer from '../../parser/columnSplicer' import ParsedHedString from '../../parser/parsedHedString' import { generateIssue } from '../../common/issues/issues' diff --git a/converter/converter.js b/converter/converter.js index 6c7b1f59..6c21c56f 100644 --- a/converter/converter.js +++ b/converter/converter.js @@ -1,4 +1,4 @@ -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' /** * Convert a HED string. diff --git a/esbuild.mjs b/esbuild.mjs index ec1316eb..785a6a11 100644 --- a/esbuild.mjs +++ b/esbuild.mjs @@ -7,7 +7,7 @@ await esbuild.build({ entryPoints: [path.join(process.cwd(), 'index.js')], loader: { '.xml': 'text' }, outdir: path.join(process.cwd(), 'dist', 'commonjs'), - target: 'node12', + target: 'node18', bundle: true, sourcemap: true, platform: 'node', diff --git a/package-lock.json b/package-lock.json index 03dbdf4e..36be2288 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,7 @@ "pluralize": "^8.0.0", "semver": "^7.6.0", "string_decoder": "^1.3.0", + "unicode-name": "^1.0.2", "xml2js": "^0.6.2" }, "devDependencies": { @@ -5191,6 +5192,14 @@ "node": ">=14.17" } }, + "node_modules/unicode-name": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz", + "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A==", + "engines": { + "node": ">=18.20" + } + }, "node_modules/update-browserslist-db": { "version": "1.0.14", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz", @@ -9108,6 +9117,11 @@ "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", "dev": true }, + "unicode-name": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz", + "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A==" + }, "update-browserslist-db": { "version": "1.0.14", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz", diff --git a/package.json b/package.json index 79b45ad6..801cf414 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "pluralize": "^8.0.0", "semver": "^7.6.0", "string_decoder": "^1.3.0", + "unicode-name": "^1.0.2", "xml2js": "^0.6.2" }, "devDependencies": { @@ -74,7 +75,10 @@ "transform": { "\\.js$": "esbuild-runner/jest", "\\.xml$": "/fileTransformer.js" - } + }, + "transformIgnorePatterns": [ + "node_modules/(?!unicode-name)" + ] }, "browser": { "fs": false diff --git a/parser/columnSplicer.js b/parser/columnSplicer.js index bc27e02e..cacb7b64 100644 --- a/parser/columnSplicer.js +++ b/parser/columnSplicer.js @@ -2,7 +2,7 @@ import ParsedHedString from './parsedHedString' import ParsedHedColumnSplice from './parsedHedColumnSplice' import ParsedHedGroup from './parsedHedGroup' import { generateIssue } from '../common/issues/issues' -import { parseHedString } from './main' +import { parseHedString } from './parser' export class ColumnSplicer { /** diff --git a/parser/main.js b/parser/main.js deleted file mode 100644 index ff250d4b..00000000 --- a/parser/main.js +++ /dev/null @@ -1,186 +0,0 @@ -import { mergeParsingIssues } from '../utils/hedData' -import { generateIssue } from '../common/issues/issues' - -import ParsedHedString from './parsedHedString' -import splitHedString from './splitHedString' -import { getCharacterCount, stringIsEmpty } from '../utils/string' - -const openingGroupCharacter = '(' -const closingGroupCharacter = ')' -const delimiters = new Set([',']) - -/** - * Substitute certain illegal characters and report warnings when found. - */ -const substituteCharacters = function (hedString) { - const issues = [] - const illegalCharacterMap = { '\0': ['ASCII NUL', ' '], '\t': ['Tab', ' '] } - const replaceFunction = function (match, offset) { - if (match in illegalCharacterMap) { - const [name, replacement] = illegalCharacterMap[match] - issues.push( - generateIssue('invalidCharacter', { - character: name, - index: offset, - string: hedString, - }), - ) - return replacement - } else { - return match - } - } - const fixedString = hedString.replace(/./g, replaceFunction) - - return [fixedString, issues] -} - -/** - * Check if group parentheses match. Pushes an issue if they don't match. - */ -const countTagGroupParentheses = function (hedString) { - const issues = [] - const numberOfOpeningParentheses = getCharacterCount(hedString, openingGroupCharacter) - const numberOfClosingParentheses = getCharacterCount(hedString, closingGroupCharacter) - if (numberOfOpeningParentheses !== numberOfClosingParentheses) { - issues.push( - generateIssue('parentheses', { - opening: numberOfOpeningParentheses, - closing: numberOfClosingParentheses, - }), - ) - } - return issues -} - -/** - * Check if a comma is missing after an opening parenthesis. - */ -const isCommaMissingAfterClosingParenthesis = function (lastNonEmptyCharacter, currentCharacter) { - return ( - lastNonEmptyCharacter === closingGroupCharacter && - !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter) - ) -} - -/** - * Check for delimiter issues in a HED string (e.g. missing commas adjacent to groups, extra commas or tildes). - */ -const findDelimiterIssuesInHedString = function (hedString) { - const issues = [] - let lastNonEmptyValidCharacter = '' - let lastNonEmptyValidIndex = 0 - let currentTag = '' - for (let i = 0; i < hedString.length; i++) { - const currentCharacter = hedString.charAt(i) - currentTag += currentCharacter - if (stringIsEmpty(currentCharacter)) { - continue - } - if (delimiters.has(currentCharacter)) { - if (currentTag.trim() === currentCharacter) { - issues.push( - generateIssue('extraDelimiter', { - character: currentCharacter, - index: i, - string: hedString, - }), - ) - currentTag = '' - continue - } - currentTag = '' - } else if (currentCharacter === openingGroupCharacter) { - if (currentTag.trim() === openingGroupCharacter) { - currentTag = '' - } else { - issues.push(generateIssue('commaMissing', { tag: currentTag })) - } - } else if (isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) { - issues.push( - generateIssue('commaMissing', { - tag: currentTag.slice(0, -1), - }), - ) - break - } - lastNonEmptyValidCharacter = currentCharacter - lastNonEmptyValidIndex = i - } - if (delimiters.has(lastNonEmptyValidCharacter)) { - issues.push( - generateIssue('extraDelimiter', { - character: lastNonEmptyValidCharacter, - index: lastNonEmptyValidIndex, - string: hedString, - }), - ) - } - return issues -} - -/** - * Validate the full unparsed HED string. - * - * @param {string} hedString The unparsed HED string. - * @returns {Object} String substitution issues and other issues. - */ -const validateFullUnparsedHedString = function (hedString) { - const [fixedHedString, substitutionIssues] = substituteCharacters(hedString) - const delimiterIssues = [].concat( - countTagGroupParentheses(fixedHedString), - findDelimiterIssuesInHedString(fixedHedString), - ) - - return { - substitution: substitutionIssues, - delimiter: delimiterIssues, - } -} - -/** - * Parse a full HED string into an object of tag types. - * - * @param {string|ParsedHedString} hedString The full HED string to parse. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedString|null, Object]} The parsed HED tag data and an object containing lists of parsing issues. - */ -export const parseHedString = function (hedString, hedSchemas) { - if (hedString instanceof ParsedHedString) { - return [hedString, {}] - } - const fullStringIssues = validateFullUnparsedHedString(hedString) - if (fullStringIssues.delimiter.length > 0) { - fullStringIssues.syntax = [] - return [null, fullStringIssues] - } - const [parsedTags, splitIssues] = splitHedString(hedString, hedSchemas) - const parsingIssues = Object.assign(fullStringIssues, splitIssues) - if (parsedTags === null) { - return [null, parsingIssues] - } - const parsedString = new ParsedHedString(hedString, parsedTags) - return [parsedString, parsingIssues] -} - -/** - * Parse a set of HED strings. - * - * @param {string[]|ParsedHedString[]} hedStrings A set of HED strings. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. - */ -export const parseHedStrings = function (hedStrings, hedSchemas) { - return hedStrings - .map((hedString) => { - return parseHedString(hedString, hedSchemas) - }) - .reduce( - ([previousStrings, previousIssues], [currentString, currentIssues]) => { - previousStrings.push(currentString) - mergeParsingIssues(previousIssues, currentIssues) - return [previousStrings, previousIssues] - }, - [[], {}], - ) -} diff --git a/parser/parser.js b/parser/parser.js new file mode 100644 index 00000000..a86e05f3 --- /dev/null +++ b/parser/parser.js @@ -0,0 +1,214 @@ +import { mergeParsingIssues } from '../utils/hedData' +import { generateIssue } from '../common/issues/issues' +import ParsedHedString from './parsedHedString' +import HedStringSplitter from './splitter' +import { getCharacterCount, stringIsEmpty } from '../utils/string' + +const openingGroupCharacter = '(' +const closingGroupCharacter = ')' +const delimiters = new Set([',']) + +/** + * A parser for HED strings. + */ +class HedStringParser { + /** + * The HED string being parsed. + * @type {string|ParsedHedString} + */ + hedString + /** + * The collection of HED schemas. + * @type {Schemas} + */ + hedSchemas + + /** + * Constructor. + * + * @param {string|ParsedHedString} hedString The HED string to be parsed. + * @param {Schemas} hedSchemas The collection of HED schemas. + */ + constructor(hedString, hedSchemas) { + this.hedString = hedString + this.hedSchemas = hedSchemas + } + + /** + * Check if the parentheses in a tag group match. + * + * @returns {Issue[]} Any issues found related to unmatched parentheses. + */ + _countTagGroupParentheses() { + const issues = [] + const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter) + const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter) + + if (numberOfOpeningParentheses !== numberOfClosingParentheses) { + issues.push( + generateIssue('parentheses', { + opening: numberOfOpeningParentheses, + closing: numberOfClosingParentheses, + }), + ) + } + + return issues + } + + /** + * Check if a comma is missing after an opening parenthesis. + * + * @param {string} lastNonEmptyCharacter The last non-empty character. + * @param {string} currentCharacter The current character in the HED string. + * @returns {boolean} Whether a comma is missing after a closing parenthesis. + */ + _isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) { + return ( + lastNonEmptyCharacter === closingGroupCharacter && + !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter) + ) + } + + /** + * Find delimiter-related issues in a HED string. + * + * @returns {Issue[]} Any issues related to delimiters. + */ + _findDelimiterIssues() { + const issues = [] + let lastNonEmptyValidCharacter = '' + let lastNonEmptyValidIndex = 0 + let currentTag = '' + + for (let i = 0; i < this.hedString.length; i++) { + const currentCharacter = this.hedString.charAt(i) + currentTag += currentCharacter + + if (stringIsEmpty(currentCharacter)) { + continue + } + + if (delimiters.has(currentCharacter)) { + if (currentTag.trim() === currentCharacter) { + issues.push( + generateIssue('extraDelimiter', { + character: currentCharacter, + index: i, + string: this.hedString, + }), + ) + currentTag = '' + continue + } + currentTag = '' + } else if (currentCharacter === openingGroupCharacter) { + if (currentTag.trim() !== openingGroupCharacter) { + issues.push(generateIssue('commaMissing', { tag: currentTag })) + } + currentTag = '' + } else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) { + issues.push( + generateIssue('commaMissing', { + tag: currentTag.slice(0, -1), + }), + ) + break + } + + lastNonEmptyValidCharacter = currentCharacter + lastNonEmptyValidIndex = i + } + + if (delimiters.has(lastNonEmptyValidCharacter)) { + issues.push( + generateIssue('extraDelimiter', { + character: lastNonEmptyValidCharacter, + index: lastNonEmptyValidIndex, + string: this.hedString, + }), + ) + } + + return issues + } + + /** + * Validate the full unparsed HED string. + * + * @returns {Object} Any issues found during validation. + */ + _validateFullUnparsedHedString() { + const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues()) + + return { delimiter: delimiterIssues } + } + + /** + * Parse a full HED string. + * + * @returns {[ParsedHedString|null, Object]} The parsed HED string and any parsing issues. + */ + parseHedString() { + if (this.hedString instanceof ParsedHedString) { + return [this.hedString, {}] + } + + const fullStringIssues = this._validateFullUnparsedHedString() + if (fullStringIssues.delimiter.length > 0) { + fullStringIssues.syntax = [] + return [null, fullStringIssues] + } + + const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString() + const parsingIssues = Object.assign(fullStringIssues, splitIssues) + if (parsedTags === null) { + return [null, parsingIssues] + } + + const parsedString = new ParsedHedString(this.hedString, parsedTags) + return [parsedString, parsingIssues] + } + + /** + * Parse a list of HED strings. + * + * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. + */ + static parseHedStrings(hedStrings, hedSchemas) { + const parsedStrings = [] + const cumulativeIssues = {} + + for (const hedString of hedStrings) { + const [parsedString, currentIssues] = new HedStringParser(hedString, hedSchemas).parseHedString() + parsedStrings.push(parsedString) + mergeParsingIssues(cumulativeIssues, currentIssues) + } + + return [parsedStrings, cumulativeIssues] + } +} + +/** + * Parse a HED string. + * + * @param {string|ParsedHedString} hedString A (possibly already parsed) HED string. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString, Object]} The parsed HED string and any issues found. + */ +export function parseHedString(hedString, hedSchemas) { + return new HedStringParser(hedString, hedSchemas).parseHedString() +} + +/** + * Parse a list of HED strings. + * + * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. + */ +export function parseHedStrings(hedStrings, hedSchemas) { + return HedStringParser.parseHedStrings(hedStrings, hedSchemas) +} diff --git a/parser/splitHedString.js b/parser/splitHedString.js deleted file mode 100644 index 1887fdc4..00000000 --- a/parser/splitHedString.js +++ /dev/null @@ -1,102 +0,0 @@ -import { ParsedHed3Tag, ParsedHedTag } from './parsedHedTag' -import ParsedHedColumnSplice from './parsedHedColumnSplice' -import ParsedHedGroup from './parsedHedGroup' -import { Schemas } from '../common/schema/types' -import { recursiveMap } from '../utils/array' -import { mergeParsingIssues } from '../utils/hedData' -import { ParsedHed2Tag } from '../validator/hed2/parser/parsedHed2Tag' -import { HedStringTokenizer, ColumnSpliceSpec, TagSpec } from './tokenizer' -import { generateIssue, IssueError } from '../common/issues/issues' - -const generationToClass = [ - (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => - new ParsedHedTag(originalTag, originalBounds), - (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => - new ParsedHedTag(originalTag, originalBounds), // Generation 1 is not supported by this validator. - (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => - new ParsedHed2Tag(originalTag, hedString, originalBounds, hedSchemas, schemaName), - (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => - new ParsedHed3Tag(tagSpec, hedSchemas, hedString), -] - -/** - * Create the parsed HED tag and group objects. - * - * @param {string} hedString The HED string to be split. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @param {TagSpec[]} tagSpecs The tag specifications. - * @param {GroupSpec} groupSpecs The bounds of the tag groups. - * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. - */ -const createParsedTags = function (hedString, hedSchemas, tagSpecs, groupSpecs) { - const conversionIssues = [] - const syntaxIssues = [] - const ParsedHedTagConstructor = generationToClass[hedSchemas.generation] - - const createParsedTag = (tagSpec) => { - if (tagSpec instanceof TagSpec) { - try { - const parsedTag = ParsedHedTagConstructor( - tagSpec.tag, - hedString, - tagSpec.bounds, - hedSchemas, - tagSpec.library, - tagSpec, - ) - return parsedTag - } catch (issueError) { - if (issueError instanceof IssueError) { - conversionIssues.push(issueError.issue) - } else if (issueError instanceof Error) { - conversionIssues.push(generateIssue('internalError', { message: issueError.message })) - } - return null - } - } else if (tagSpec instanceof ColumnSpliceSpec) { - return new ParsedHedColumnSplice(tagSpec.columnName, tagSpec.bounds) - } - } - const createParsedGroups = (tags, groupSpecs) => { - const tagGroups = [] - let index = 0 - for (const tag of tags) { - if (Array.isArray(tag)) { - const groupSpec = groupSpecs[index] - tagGroups.push( - new ParsedHedGroup(createParsedGroups(tag, groupSpec.children), hedSchemas, hedString, groupSpec.bounds), - ) - index++ - } else if (tag !== null) { - tagGroups.push(tag) - } - } - return tagGroups - } - const parsedTags = recursiveMap(createParsedTag, tagSpecs) - const parsedTagsWithGroups = createParsedGroups(parsedTags, groupSpecs.children) - - const issues = { - syntax: syntaxIssues, - conversion: conversionIssues, - } - - return [parsedTagsWithGroups, issues] -} - -/** - * Split a HED string. - * - * @param {string} hedString The HED string to be split. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. - */ -export default function splitHedString(hedString, hedSchemas) { - const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(hedString).tokenize() - if (tokenizingIssues.syntax.length > 0) { - return [null, tokenizingIssues] - } - const [parsedTags, parsingIssues] = createParsedTags(hedString, hedSchemas, tagSpecs, groupBounds) - mergeParsingIssues(tokenizingIssues, parsingIssues) - return [parsedTags, tokenizingIssues] -} diff --git a/parser/splitter.js b/parser/splitter.js new file mode 100644 index 00000000..31038244 --- /dev/null +++ b/parser/splitter.js @@ -0,0 +1,167 @@ +import { ParsedHed3Tag, ParsedHedTag } from './parsedHedTag' +import ParsedHedColumnSplice from './parsedHedColumnSplice' +import ParsedHedGroup from './parsedHedGroup' +import { Schemas } from '../common/schema/types' +import { recursiveMap } from '../utils/array' +import { mergeParsingIssues } from '../utils/hedData' +import { ParsedHed2Tag } from '../validator/hed2/parser/parsedHed2Tag' +import { HedStringTokenizer, ColumnSpliceSpec, TagSpec } from './tokenizer' +import { generateIssue, IssueError } from '../common/issues/issues' + +const generationToClass = [ + (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => + new ParsedHedTag(originalTag, originalBounds), + (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => + new ParsedHedTag(originalTag, originalBounds), // Generation 1 is not supported by this validator. + (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => + new ParsedHed2Tag(originalTag, hedString, originalBounds, hedSchemas, schemaName), + (originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) => + new ParsedHed3Tag(tagSpec, hedSchemas, hedString), +] + +export default class HedStringSplitter { + /** + * The HED string being split. + * @type {string} + */ + hedString + /** + * The collection of HED schemas. + * @type {Schemas} + */ + hedSchemas + /** + * Any issues found during tag conversion. + * @type {Issue[]} + */ + conversionIssues + /** + * Any syntax issues found. + * @type {Issue[]} + */ + syntaxIssues + /** + * The constructor to be used to build the parsed HED tags. + * @type {function (string, string, number[], Schemas, string, TagSpec): ParsedHedTag} + */ + ParsedHedTagConstructor + + /** + * Constructor. + * + * @param {string} hedString The HED string to be split and parsed. + * @param {Schemas} hedSchemas The collection of HED schemas. + */ + constructor(hedString, hedSchemas) { + this.hedString = hedString + this.hedSchemas = hedSchemas + this.conversionIssues = [] + this.syntaxIssues = [] + this.ParsedHedTagConstructor = generationToClass[hedSchemas.generation] + } + + /** + * Split and parse a HED string into tags and groups. + * + * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. + */ + splitHedString() { + const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(this.hedString).tokenize() + if (tokenizingIssues.syntax.length > 0) { + return [null, tokenizingIssues] + } + + const [parsedTags, parsingIssues] = this._createParsedTags(tagSpecs, groupBounds) + mergeParsingIssues(tokenizingIssues, parsingIssues) + + return [parsedTags, tokenizingIssues] + } + + /** + * Create parsed HED tags and groups from specifications. + * + * @param {TagSpec[]} tagSpecs The tag specifications. + * @param {GroupSpec} groupSpecs The group specifications. + * @returns {[ParsedHedSubstring[], Object]} The parsed HED tags and any issues. + */ + _createParsedTags(tagSpecs, groupSpecs) { + // Create tags from specifications + const parsedTags = recursiveMap((tagSpec) => this._createParsedTag(tagSpec), tagSpecs) + + // Create groups from the parsed tags + const parsedTagsWithGroups = this._createParsedGroups(parsedTags, groupSpecs.children) + + const issues = { syntax: this.syntaxIssues, conversion: this.conversionIssues } + return [parsedTagsWithGroups, issues] + } + + /** + * Create a parsed tag object based on the tag specification. + * + * @param {TagSpec|ColumnSpliceSpec} tagSpec The tag or column splice specification. + * @returns {ParsedHedTag|ParsedHedColumnSplice|null} The parsed HED tag or column splice. + */ + _createParsedTag(tagSpec) { + if (tagSpec instanceof TagSpec) { + try { + return this.ParsedHedTagConstructor( + tagSpec.tag, + this.hedString, + tagSpec.bounds, + this.hedSchemas, + tagSpec.library, + tagSpec, + ) + } catch (issueError) { + this._handleIssueError(issueError) + return null + } + } else if (tagSpec instanceof ColumnSpliceSpec) { + return new ParsedHedColumnSplice(tagSpec.columnName, tagSpec.bounds) + } + } + + /** + * Handle any issue encountered during tag parsing. + * + * @param {Error|IssueError} issueError The error encountered. + */ + _handleIssueError(issueError) { + if (issueError instanceof IssueError) { + this.conversionIssues.push(issueError.issue) + } else if (issueError instanceof Error) { + this.conversionIssues.push(generateIssue('internalError', { message: issueError.message })) + } + } + + /** + * Create parsed HED groups from parsed tags and group specifications. + * + * @param {ParsedHedTag[]} tags The parsed HED tags. + * @param {GroupSpec[]} groupSpecs The group specifications. + * @returns {ParsedHedGroup[]} The parsed HED groups. + */ + _createParsedGroups(tags, groupSpecs) { + const tagGroups = [] + let index = 0 + + for (const tag of tags) { + if (Array.isArray(tag)) { + const groupSpec = groupSpecs[index] + tagGroups.push( + new ParsedHedGroup( + this._createParsedGroups(tag, groupSpec.children), + this.hedSchemas, + this.hedString, + groupSpec.bounds, + ), + ) + index++ + } else if (tag !== null) { + tagGroups.push(tag) + } + } + + return tagGroups + } +} diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 89ecaab3..b308a9d9 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -1,3 +1,5 @@ +import { unicodeName } from 'unicode-name' + import { generateIssue } from '../common/issues/issues' import { stringIsEmpty } from '../utils/string' import { replaceTagNameWithPound } from '../utils/hedStrings' @@ -12,6 +14,14 @@ const slashCharacter = '/' const invalidCharacters = new Set(['[', ']', '~', '"']) const invalidCharactersOutsideOfValues = new Set([':']) +// C0 control codes +for (let i = 0x00; i <= 0x1f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} +// DEL and C1 control codes +for (let i = 0x7f; i <= 0x9f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} /** * A specification for a tokenized substring. @@ -366,7 +376,7 @@ export class HedStringTokenizer { _pushInvalidCharacterIssue(character, index) { this.syntaxIssues.push( generateIssue('invalidCharacter', { - character: character, + character: unicodeName(character), index: index, string: this.hedString, }), diff --git a/tests/bids.spec.js b/tests/bids.spec.js index af427877..f62b3de9 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -8,7 +8,7 @@ import { SchemaSpec, SchemasSpec } from '../common/schema/types' import { buildBidsSchemas, parseSchemasSpec } from '../bids/schema' import { BidsDataset, BidsHedIssue, BidsIssue, validateBidsDataset } from '../bids' import { bidsDatasetDescriptions, bidsSidecars, bidsTsvFiles } from './bids.spec.data' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' describe('BIDS datasets', () => { @@ -668,7 +668,7 @@ describe('BIDS datasets', () => { syntax: [ BidsHedIssue.fromHedIssue( generateIssue('invalidCharacter', { - character: '{', + character: 'LEFT CURLY BRACKET', index: 9, string: '(Def/Acc/{response_time})', }), diff --git a/tests/event.spec.js b/tests/event.spec.js index 87ef696b..dfc22f2e 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -4,7 +4,7 @@ import { beforeAll, describe, it } from '@jest/globals' import * as hed from '../validator/event' import { buildSchemas } from '../validator/schema/init' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { ParsedHedTag } from '../parser/parsedHedTag' import { HedValidator, Hed2Validator, Hed3Validator } from '../validator/event' import { generateIssue } from '../common/issues/issues' @@ -184,11 +184,13 @@ describe('HED string and event validation', () => { closingBracket: '/Attribute/Object side/Left,/Participant/Effect]/Body part/Arm', tilde: '/Attribute/Object side/Left,/Participant/Effect~/Body part/Arm', doubleQuote: '/Attribute/Object side/Left,/Participant/Effect"/Body part/Arm', + null: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0', + tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t', } const expectedIssues = { openingBrace: [ generateIssue('invalidCharacter', { - character: '{', + character: 'LEFT CURLY BRACKET', index: 47, string: testStrings.openingBrace, }), @@ -201,54 +203,42 @@ describe('HED string and event validation', () => { ], openingBracket: [ generateIssue('invalidCharacter', { - character: '[', + character: 'LEFT SQUARE BRACKET', index: 47, string: testStrings.openingBracket, }), ], closingBracket: [ generateIssue('invalidCharacter', { - character: ']', + character: 'RIGHT SQUARE BRACKET', index: 47, string: testStrings.closingBracket, }), ], tilde: [ generateIssue('invalidCharacter', { - character: '~', + character: 'TILDE', index: 47, string: testStrings.tilde, }), ], doubleQuote: [ generateIssue('invalidCharacter', { - character: '"', + character: 'QUOTATION MARK', index: 47, string: testStrings.doubleQuote, }), ], - } - // No-op function as this check is done during the parsing stage. - // eslint-disable-next-line no-unused-vars - validatorSyntactic(testStrings, expectedIssues, (validator) => {}) - }) - - it('should substitute and warn for certain illegal characters', () => { - const testStrings = { - nul: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0', - tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t', - } - const expectedIssues = { - nul: [ + null: [ generateIssue('invalidCharacter', { - character: 'ASCII NUL', + character: 'NULL', index: 61, - string: testStrings.nul, + string: testStrings.null, }), ], tab: [ generateIssue('invalidCharacter', { - character: 'Tab', + character: 'CHARACTER TABULATION', index: 61, string: testStrings.tab, }), diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index 0e3aca2f..c9335456 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -5,9 +5,9 @@ import { beforeAll, describe, it } from '@jest/globals' import { generateIssue } from '../common/issues/issues' import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' import { recursiveMap } from '../utils/array' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { ParsedHedTag } from '../parser/parsedHedTag' -import splitHedString from '../parser/splitHedString' +import HedStringSplitter from '../parser/splitter' import { buildSchemas } from '../validator/schema/init' import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' @@ -21,6 +21,8 @@ describe('HED string parsing', () => { */ const originalMap = (parsedTag) => parsedTag.originalTag + const splitHedString = (hedString, hedSchemas) => new HedStringSplitter(hedString, hedSchemas).splitHedString() + const hedSchemaFile = 'tests/data/HED8.0.0.xml' let hedSchemas @@ -82,7 +84,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: '[', + character: 'LEFT SQUARE BRACKET', index: 56, string: testStrings.openingSquare, }), @@ -92,7 +94,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: ']', + character: 'RIGHT SQUARE BRACKET', index: 56, string: testStrings.closingSquare, }), @@ -102,7 +104,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: '~', + character: 'TILDE', index: 56, string: testStrings.tilde, }), diff --git a/validator/dataset.js b/validator/dataset.js index 29dcd90e..831b1209 100644 --- a/validator/dataset.js +++ b/validator/dataset.js @@ -2,7 +2,7 @@ import zip from 'lodash/zip' import { generateIssue, Issue } from '../common/issues/issues' import { validateHedEventWithDefinitions } from './event' -import { parseHedStrings } from '../parser/main' +import { parseHedStrings } from '../parser/parser' import { filterNonEqualDuplicates } from '../utils/map' /** diff --git a/validator/event/init.js b/validator/event/init.js index b8ed69eb..6c27f44d 100644 --- a/validator/event/init.js +++ b/validator/event/init.js @@ -1,4 +1,4 @@ -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ParsedHedString from '../../parser/parsedHedString' import { Schemas } from '../../common/schema/types'