From 01c4cf4b5f5cc850921ad9e6a636c474c9783ce8 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:29:49 -0600 Subject: [PATCH 1/3] Removing the parentheses counting -- goal rely on tokenizer. --- parser/parser.js | 16 ++-- parser/tokenizer.js | 11 +++ tests/dataset.spec.js | 26 +++--- tests/event.spec.js | 6 +- tests/testData/tokenizerTests.data.js | 124 ++++++++++++++++++++++++++ tests/tokenizerTests.spec.js | 2 +- 6 files changed, 161 insertions(+), 24 deletions(-) diff --git a/parser/parser.js b/parser/parser.js index a86e05f3..8bf25e45 100644 --- a/parser/parser.js +++ b/parser/parser.js @@ -154,14 +154,14 @@ class HedStringParser { return [this.hedString, {}] } - const fullStringIssues = this._validateFullUnparsedHedString() - if (fullStringIssues.delimiter.length > 0) { - fullStringIssues.syntax = [] - return [null, fullStringIssues] - } - - const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString() - const parsingIssues = Object.assign(fullStringIssues, splitIssues) + // const fullStringIssues = this._validateFullUnparsedHedString() + // if (fullStringIssues.delimiter.length > 0) { + // fullStringIssues.syntax = [] + // return [null, fullStringIssues] + // } + + const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString() + //const parsingIssues = Object.assign(fullStringIssues, splitIssues) if (parsedTags === null) { return [null, parsingIssues] } diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 8ffcfd6a..c905d188 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -179,6 +179,13 @@ export class HedStringTokenizer { this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) { this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash + } + if ( + this.state.currentToken.trim().length > 0 && + ![undefined, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0]) + ) { + // Missing comma + this.pushIssue('commaMissing', this.state.lastDelimiter[1] + 1) } else { if (this.state.currentToken.trim().length > 0) { this.pushTag(this.hedString.length) @@ -261,6 +268,10 @@ export class HedStringTokenizer { handleOpeningGroup(i) { if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) { this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1]) + } else if (this.state.lastDelimiter[0] === CHARACTERS.CLOSING_COLUMN) { + this.pushIssue('commaMissing', this.state.lastDelimiter[1]) + } else if (this.state.currentToken.trim().length > 0) { + this.pushInvalidTag('commaMissing', i, this.state.currentToken.trim()) } else { this.state.currentGroupStack.push([]) this.state.parenthesesStack.push(new GroupSpec(i, undefined, [])) diff --git a/tests/dataset.spec.js b/tests/dataset.spec.js index 8963264f..f8d09365 100644 --- a/tests/dataset.spec.js +++ b/tests/dataset.spec.js @@ -38,7 +38,7 @@ describe('HED dataset validation', () => { const testDatasets = { empty: [], singleValidLong: ['Event/Sensory-event'], - singleValidShort: ['Sensory-event'], + /* singleValidShort: ['Sensory-event'], multipleValidLong: [ 'Event/Sensory-event', 'Item/Object/Man-made-object/Vehicle/Train', @@ -46,23 +46,23 @@ describe('HED dataset validation', () => { ], multipleValidShort: ['Sensory-event', 'Train', 'RGB-red/0.5'], multipleValidMixed: ['Event/Sensory-event', 'Train', 'RGB-red/0.5'], - multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'], + multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],*/ } const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour'] const expectedIssues = { empty: [], singleValidLong: [], - singleValidShort: [], - multipleValidLong: [], - multipleValidShort: [], - multipleValidMixed: [], - multipleInvalid: [ - generateValidationIssue('unitClassInvalidUnit', { - tag: testDatasets.multipleInvalid[0], - unitClassUnits: legalTimeUnits.sort().join(','), - }), - generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }), - ], + /* singleValidShort: [], + multipleValidLong: [], + multipleValidShort: [], + multipleValidMixed: [], + multipleInvalid: [ + generateValidationIssue('unitClassInvalidUnit', { + tag: testDatasets.multipleInvalid[0], + unitClassUnits: legalTimeUnits.sort().join(','), + }), + generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }), + ],*/ } return validator(testDatasets, expectedIssues) }) diff --git a/tests/event.spec.js b/tests/event.spec.js index 88e22882..fe9027dd 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -61,7 +61,8 @@ describe('HED string and event validation', () => { describe('Full HED Strings', () => { const validatorSyntactic = validatorSyntacticBase - it('should not have mismatched parentheses', () => { + //TODO: Remove this test -- the separate test for mismatched parentheses is no longer performed. + it.skip('should not have mismatched parentheses', () => { const testStrings = { extraOpening: 'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', @@ -84,7 +85,8 @@ describe('HED string and event validation', () => { validatorSyntactic(testStrings, expectedIssues, (validator) => {}) }) - it('should not have malformed delimiters', () => { + // TODO: This test is replaced by tokenizer tests and should be removed + it.skip('should not have malformed delimiters', () => { const testStrings = { missingOpeningComma: 'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px', diff --git a/tests/testData/tokenizerTests.data.js b/tests/testData/tokenizerTests.data.js index 759f03d8..232b7635 100644 --- a/tests/testData/tokenizerTests.data.js +++ b/tests/testData/tokenizerTests.data.js @@ -352,6 +352,100 @@ export const tokenizerTests = [ }, ], }, + { + name: 'invalid-comma-missing-or-extra', + description: 'Commas must separate tags and groups', + tests: [ + { + testname: 'missing-comma-before-open', + explanation: 'Must have a comma before open parentheses', + string: 'x, y(z)', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('commaMissing', { index: '4', string: 'x, y(z)', tag: 'y' })], + }, + { + testname: 'missing-comma-before-close', + explanation: 'Must have a comma after closing parentheses', + string: 'x, (y)zp, (w)', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('invalidTag', { index: '8', string: 'x, (y)zp, (w)' })], + }, + { + testname: 'missing-comma-before-close-at-end', + explanation: 'Must have a comma after closing parenthesis at end of string', + string: 'x, (y)z', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('commaMissing', { index: '6', string: 'x, (y)z' })], + }, + { + testname: 'missing-comma-before-open-column', + explanation: 'Must have a comma before open brace', + string: 'x, y, {(z)', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {(z)' })], + }, + { + testname: 'missing-close-brace-before-parentheses', + explanation: 'Must have a closed-brace-after-open-brace', + string: 'x, y, {w(z)', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {w(z)' })], + }, + { + testname: 'missing-comma-before-closing-column', + explanation: 'Must have a comma before closing brace', + string: 'x, {y}(z)', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('commaMissing', { index: '5', string: 'x, {y}(z)' })], + }, + { + testname: 'extra-leading-comma', + explanation: 'Leading comma not allowed', + string: ',x, (y), z', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '0', string: ',x, (y), z' })], + }, + { + testname: 'extra-closing-comma', + explanation: 'Closing comma not allowed', + string: 'x, (y), z,', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,' })], + }, + { + testname: 'multiple-leading-commas', + explanation: 'Multiple leading commas not allowed', + string: ',,x, (y), z', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '0', string: ',,x, (y), z' })], + }, + { + testname: 'multiple-closing-commas', + explanation: 'Multiple closing comma not allowed', + string: 'x, (y), z,,', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,,' })], + }, + { + testname: 'multiple-internal-commas', + explanation: 'Multiple closing commas not allowed', + string: 'x, (y),, z', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '6', string: 'x, (y),, z' })], + }, + ], + }, { name: 'invalid-improper-curly-braces', description: 'Curly braces cannot have commas or parentheses or other curly braces', @@ -398,4 +492,34 @@ export const tokenizerTests = [ }, ], }, + { + name: 'invalid-parenthesis', + description: 'Various types of mismatched parentheses', + tests: [ + { + testname: 'extra-opening-parentheses', + explanation: 'Parentheses must match', + string: 'x, (((y, z), w), u', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('unclosedParenthesis', { index: '3', string: 'x, (((y, z), w), u' })], + }, + { + testname: 'extra-closing-parentheses', + explanation: 'Cannot parentheses inside curly braces', + string: 'x, (y, z)), w', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('unopenedParenthesis', { index: '9', string: 'x, (y, z)), w' })], + }, + { + testname: 'parentheses-in-wrong-order', + explanation: 'Nesting must be proper', + string: '((x),y))(z', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('unopenedParenthesis', { index: '7', string: '((x),y))(z' })], + }, + ], + }, ] diff --git a/tests/tokenizerTests.spec.js b/tests/tokenizerTests.spec.js index 90df6709..38584a4a 100644 --- a/tests/tokenizerTests.spec.js +++ b/tests/tokenizerTests.spec.js @@ -10,7 +10,7 @@ import { tokenizerTests } from './testData/tokenizerTests.data' const runAll = true let onlyRun = new Map() if (!runAll) { - onlyRun = new Map([['invalid-empty-tag-in-various-places', ['end-in-comma']]]) + onlyRun = new Map([['invalid-comma-missing-or-extra', ['missing-comma-before-close']]]) } describe('Tokenizer validation using JSON tests', () => { From 095bc2b59d86a67818aa1affd50da2527ce047fb Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:00:16 -0600 Subject: [PATCH 2/3] Removed extra parsing of delimiters already done by tokenizer --- parser/parser.js | 117 -------------------------- tests/testData/tokenizerTests.data.js | 56 ++++++------ validator/event/init.js | 2 +- 3 files changed, 26 insertions(+), 149 deletions(-) diff --git a/parser/parser.js b/parser/parser.js index 8bf25e45..22489446 100644 --- a/parser/parser.js +++ b/parser/parser.js @@ -34,116 +34,6 @@ class HedStringParser { this.hedSchemas = hedSchemas } - /** - * Check if the parentheses in a tag group match. - * - * @returns {Issue[]} Any issues found related to unmatched parentheses. - */ - _countTagGroupParentheses() { - const issues = [] - const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter) - const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter) - - if (numberOfOpeningParentheses !== numberOfClosingParentheses) { - issues.push( - generateIssue('parentheses', { - opening: numberOfOpeningParentheses, - closing: numberOfClosingParentheses, - }), - ) - } - - return issues - } - - /** - * Check if a comma is missing after an opening parenthesis. - * - * @param {string} lastNonEmptyCharacter The last non-empty character. - * @param {string} currentCharacter The current character in the HED string. - * @returns {boolean} Whether a comma is missing after a closing parenthesis. - */ - _isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) { - return ( - lastNonEmptyCharacter === closingGroupCharacter && - !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter) - ) - } - - /** - * Find delimiter-related issues in a HED string. - * - * @returns {Issue[]} Any issues related to delimiters. - */ - _findDelimiterIssues() { - const issues = [] - let lastNonEmptyValidCharacter = '' - let lastNonEmptyValidIndex = 0 - let currentTag = '' - - for (let i = 0; i < this.hedString.length; i++) { - const currentCharacter = this.hedString.charAt(i) - currentTag += currentCharacter - - if (stringIsEmpty(currentCharacter)) { - continue - } - - if (delimiters.has(currentCharacter)) { - if (currentTag.trim() === currentCharacter) { - issues.push( - generateIssue('extraDelimiter', { - character: currentCharacter, - index: i, - string: this.hedString, - }), - ) - currentTag = '' - continue - } - currentTag = '' - } else if (currentCharacter === openingGroupCharacter) { - if (currentTag.trim() !== openingGroupCharacter) { - issues.push(generateIssue('commaMissing', { tag: currentTag })) - } - currentTag = '' - } else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) { - issues.push( - generateIssue('commaMissing', { - tag: currentTag.slice(0, -1), - }), - ) - break - } - - lastNonEmptyValidCharacter = currentCharacter - lastNonEmptyValidIndex = i - } - - if (delimiters.has(lastNonEmptyValidCharacter)) { - issues.push( - generateIssue('extraDelimiter', { - character: lastNonEmptyValidCharacter, - index: lastNonEmptyValidIndex, - string: this.hedString, - }), - ) - } - - return issues - } - - /** - * Validate the full unparsed HED string. - * - * @returns {Object} Any issues found during validation. - */ - _validateFullUnparsedHedString() { - const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues()) - - return { delimiter: delimiterIssues } - } - /** * Parse a full HED string. * @@ -154,14 +44,7 @@ class HedStringParser { return [this.hedString, {}] } - // const fullStringIssues = this._validateFullUnparsedHedString() - // if (fullStringIssues.delimiter.length > 0) { - // fullStringIssues.syntax = [] - // return [null, fullStringIssues] - // } - const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString() - //const parsingIssues = Object.assign(fullStringIssues, splitIssues) if (parsedTags === null) { return [null, parsingIssues] } diff --git a/tests/testData/tokenizerTests.data.js b/tests/testData/tokenizerTests.data.js index 232b7635..0afaea11 100644 --- a/tests/testData/tokenizerTests.data.js +++ b/tests/testData/tokenizerTests.data.js @@ -268,36 +268,6 @@ export const tokenizerTests = [ }, ], }, - { - name: 'invalid-empty-tag-in-various-places', - description: 'Empty tags in various places (empty groups are allowed).', - tests: [ - { - testname: 'end-in-comma', - explanation: 'Cannot end in a comma', - string: 'x,y,', - tagSpecs: [], - groupSpec: null, - errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })], - }, - { - testname: 'double-in-comma', - explanation: 'Cannot have double commas', - string: 'x,,y,', - tagSpecs: [], - groupSpec: null, - errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })], - }, - { - testname: 'leading-comma', - string: ',x,y', - explanation: 'Cannot have a leading comma', - tagSpecs: [], - groupSpec: null, - errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })], - }, - ], - }, { name: 'invalid-extra-slash-in-various-places', description: 'Tags cannot have leading or trailing, or extra slashes', @@ -353,9 +323,33 @@ export const tokenizerTests = [ ], }, { - name: 'invalid-comma-missing-or-extra', + name: 'invalid-commas', description: 'Commas must separate tags and groups', tests: [ + { + testname: 'end-in-comma', + explanation: 'Cannot end in a comma', + string: 'x,y,', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })], + }, + { + testname: 'double-in-comma', + explanation: 'Cannot have double commas', + string: 'x,,y,', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })], + }, + { + testname: 'leading-comma', + string: ',x,y', + explanation: 'Cannot have a leading comma', + tagSpecs: [], + groupSpec: null, + errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })], + }, { testname: 'missing-comma-before-open', explanation: 'Must have a comma before open parentheses', diff --git a/validator/event/init.js b/validator/event/init.js index 6c27f44d..fe316010 100644 --- a/validator/event/init.js +++ b/validator/event/init.js @@ -31,7 +31,7 @@ const initiallyValidateHedString = function (hedString, hedSchemas, options, def } if (parsedString === null) { return [null, [].concat(...Object.values(parsingIssues)), null] - } else if (parsingIssues.syntax.length + parsingIssues.delimiter.length > 0) { + } else if (parsingIssues.syntax.length > 0) { hedSchemas = new Schemas(null) } let hedValidator From 08956d8a6e8ade75209fec0db8dc11754bcc3a2e Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:07:17 -0600 Subject: [PATCH 3/3] Restored commented out tests --- tests/dataset.spec.js | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/dataset.spec.js b/tests/dataset.spec.js index f8d09365..8963264f 100644 --- a/tests/dataset.spec.js +++ b/tests/dataset.spec.js @@ -38,7 +38,7 @@ describe('HED dataset validation', () => { const testDatasets = { empty: [], singleValidLong: ['Event/Sensory-event'], - /* singleValidShort: ['Sensory-event'], + singleValidShort: ['Sensory-event'], multipleValidLong: [ 'Event/Sensory-event', 'Item/Object/Man-made-object/Vehicle/Train', @@ -46,23 +46,23 @@ describe('HED dataset validation', () => { ], multipleValidShort: ['Sensory-event', 'Train', 'RGB-red/0.5'], multipleValidMixed: ['Event/Sensory-event', 'Train', 'RGB-red/0.5'], - multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],*/ + multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'], } const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour'] const expectedIssues = { empty: [], singleValidLong: [], - /* singleValidShort: [], - multipleValidLong: [], - multipleValidShort: [], - multipleValidMixed: [], - multipleInvalid: [ - generateValidationIssue('unitClassInvalidUnit', { - tag: testDatasets.multipleInvalid[0], - unitClassUnits: legalTimeUnits.sort().join(','), - }), - generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }), - ],*/ + singleValidShort: [], + multipleValidLong: [], + multipleValidShort: [], + multipleValidMixed: [], + multipleInvalid: [ + generateValidationIssue('unitClassInvalidUnit', { + tag: testDatasets.multipleInvalid[0], + unitClassUnits: legalTimeUnits.sort().join(','), + }), + generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }), + ], } return validator(testDatasets, expectedIssues) })