Skip to content

Commit

Permalink
Merge pull request #212 from VisLab/update-tokenizer
Browse files Browse the repository at this point in the history
Update tokenizer -- removed extra delimiter tests in parser
  • Loading branch information
happy5214 authored Nov 4, 2024
2 parents 1f4764f + 08956d8 commit 9cc9bcc
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 152 deletions.
119 changes: 1 addition & 118 deletions parser/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,116 +34,6 @@ class HedStringParser {
this.hedSchemas = hedSchemas
}

/**
* Check if the parentheses in a tag group match.
*
* @returns {Issue[]} Any issues found related to unmatched parentheses.
*/
_countTagGroupParentheses() {
const issues = []
const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter)
const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter)

if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
issues.push(
generateIssue('parentheses', {
opening: numberOfOpeningParentheses,
closing: numberOfClosingParentheses,
}),
)
}

return issues
}

/**
* Check if a comma is missing after an opening parenthesis.
*
* @param {string} lastNonEmptyCharacter The last non-empty character.
* @param {string} currentCharacter The current character in the HED string.
* @returns {boolean} Whether a comma is missing after a closing parenthesis.
*/
_isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) {
return (
lastNonEmptyCharacter === closingGroupCharacter &&
!(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
)
}

/**
* Find delimiter-related issues in a HED string.
*
* @returns {Issue[]} Any issues related to delimiters.
*/
_findDelimiterIssues() {
const issues = []
let lastNonEmptyValidCharacter = ''
let lastNonEmptyValidIndex = 0
let currentTag = ''

for (let i = 0; i < this.hedString.length; i++) {
const currentCharacter = this.hedString.charAt(i)
currentTag += currentCharacter

if (stringIsEmpty(currentCharacter)) {
continue
}

if (delimiters.has(currentCharacter)) {
if (currentTag.trim() === currentCharacter) {
issues.push(
generateIssue('extraDelimiter', {
character: currentCharacter,
index: i,
string: this.hedString,
}),
)
currentTag = ''
continue
}
currentTag = ''
} else if (currentCharacter === openingGroupCharacter) {
if (currentTag.trim() !== openingGroupCharacter) {
issues.push(generateIssue('commaMissing', { tag: currentTag }))
}
currentTag = ''
} else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
issues.push(
generateIssue('commaMissing', {
tag: currentTag.slice(0, -1),
}),
)
break
}

lastNonEmptyValidCharacter = currentCharacter
lastNonEmptyValidIndex = i
}

if (delimiters.has(lastNonEmptyValidCharacter)) {
issues.push(
generateIssue('extraDelimiter', {
character: lastNonEmptyValidCharacter,
index: lastNonEmptyValidIndex,
string: this.hedString,
}),
)
}

return issues
}

/**
* Validate the full unparsed HED string.
*
* @returns {Object<string, Issue[]>} Any issues found during validation.
*/
_validateFullUnparsedHedString() {
const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues())

return { delimiter: delimiterIssues }
}

/**
* Parse a full HED string.
*
Expand All @@ -154,14 +44,7 @@ class HedStringParser {
return [this.hedString, {}]
}

const fullStringIssues = this._validateFullUnparsedHedString()
if (fullStringIssues.delimiter.length > 0) {
fullStringIssues.syntax = []
return [null, fullStringIssues]
}

const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
const parsingIssues = Object.assign(fullStringIssues, splitIssues)
const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
if (parsedTags === null) {
return [null, parsingIssues]
}
Expand Down
11 changes: 11 additions & 0 deletions parser/tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ export class HedStringTokenizer {
this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma
} else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) {
this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash
}
if (
this.state.currentToken.trim().length > 0 &&
![undefined, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0])
) {
// Missing comma
this.pushIssue('commaMissing', this.state.lastDelimiter[1] + 1)
} else {
if (this.state.currentToken.trim().length > 0) {
this.pushTag(this.hedString.length)
Expand Down Expand Up @@ -261,6 +268,10 @@ export class HedStringTokenizer {
handleOpeningGroup(i) {
if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) {
this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1])
} else if (this.state.lastDelimiter[0] === CHARACTERS.CLOSING_COLUMN) {
this.pushIssue('commaMissing', this.state.lastDelimiter[1])
} else if (this.state.currentToken.trim().length > 0) {
this.pushInvalidTag('commaMissing', i, this.state.currentToken.trim())
} else {
this.state.currentGroupStack.push([])
this.state.parenthesesStack.push(new GroupSpec(i, undefined, []))
Expand Down
6 changes: 4 additions & 2 deletions tests/event.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ describe('HED string and event validation', () => {
describe('Full HED Strings', () => {
const validatorSyntactic = validatorSyntacticBase

it('should not have mismatched parentheses', () => {
//TODO: Remove this test -- the separate test for mismatched parentheses is no longer performed.
it.skip('should not have mismatched parentheses', () => {
const testStrings = {
extraOpening:
'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
Expand All @@ -84,7 +85,8 @@ describe('HED string and event validation', () => {
validatorSyntactic(testStrings, expectedIssues, (validator) => {})
})

it('should not have malformed delimiters', () => {
// TODO: This test is replaced by tokenizer tests and should be removed
it.skip('should not have malformed delimiters', () => {
const testStrings = {
missingOpeningComma:
'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
Expand Down
178 changes: 148 additions & 30 deletions tests/testData/tokenizerTests.data.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,36 +268,6 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-empty-tag-in-various-places',
description: 'Empty tags in various places (empty groups are allowed).',
tests: [
{
testname: 'end-in-comma',
explanation: 'Cannot end in a comma',
string: 'x,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
},
{
testname: 'double-in-comma',
explanation: 'Cannot have double commas',
string: 'x,,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
},
{
testname: 'leading-comma',
string: ',x,y',
explanation: 'Cannot have a leading comma',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
},
],
},
{
name: 'invalid-extra-slash-in-various-places',
description: 'Tags cannot have leading or trailing, or extra slashes',
Expand Down Expand Up @@ -352,6 +322,124 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-commas',
description: 'Commas must separate tags and groups',
tests: [
{
testname: 'end-in-comma',
explanation: 'Cannot end in a comma',
string: 'x,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
},
{
testname: 'double-in-comma',
explanation: 'Cannot have double commas',
string: 'x,,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
},
{
testname: 'leading-comma',
string: ',x,y',
explanation: 'Cannot have a leading comma',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
},
{
testname: 'missing-comma-before-open',
explanation: 'Must have a comma before open parentheses',
string: 'x, y(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '4', string: 'x, y(z)', tag: 'y' })],
},
{
testname: 'missing-comma-before-close',
explanation: 'Must have a comma after closing parentheses',
string: 'x, (y)zp, (w)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('invalidTag', { index: '8', string: 'x, (y)zp, (w)' })],
},
{
testname: 'missing-comma-before-close-at-end',
explanation: 'Must have a comma after closing parenthesis at end of string',
string: 'x, (y)z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '6', string: 'x, (y)z' })],
},
{
testname: 'missing-comma-before-open-column',
explanation: 'Must have a comma before open brace',
string: 'x, y, {(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {(z)' })],
},
{
testname: 'missing-close-brace-before-parentheses',
explanation: 'Must have a closed-brace-after-open-brace',
string: 'x, y, {w(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {w(z)' })],
},
{
testname: 'missing-comma-before-closing-column',
explanation: 'Must have a comma before closing brace',
string: 'x, {y}(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '5', string: 'x, {y}(z)' })],
},
{
testname: 'extra-leading-comma',
explanation: 'Leading comma not allowed',
string: ',x, (y), z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x, (y), z' })],
},
{
testname: 'extra-closing-comma',
explanation: 'Closing comma not allowed',
string: 'x, (y), z,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,' })],
},
{
testname: 'multiple-leading-commas',
explanation: 'Multiple leading commas not allowed',
string: ',,x, (y), z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',,x, (y), z' })],
},
{
testname: 'multiple-closing-commas',
explanation: 'Multiple closing comma not allowed',
string: 'x, (y), z,,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,,' })],
},
{
testname: 'multiple-internal-commas',
explanation: 'Multiple closing commas not allowed',
string: 'x, (y),, z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '6', string: 'x, (y),, z' })],
},
],
},
{
name: 'invalid-improper-curly-braces',
description: 'Curly braces cannot have commas or parentheses or other curly braces',
Expand Down Expand Up @@ -398,4 +486,34 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-parenthesis',
description: 'Various types of mismatched parentheses',
tests: [
{
testname: 'extra-opening-parentheses',
explanation: 'Parentheses must match',
string: 'x, (((y, z), w), u',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedParenthesis', { index: '3', string: 'x, (((y, z), w), u' })],
},
{
testname: 'extra-closing-parentheses',
explanation: 'Cannot parentheses inside curly braces',
string: 'x, (y, z)), w',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unopenedParenthesis', { index: '9', string: 'x, (y, z)), w' })],
},
{
testname: 'parentheses-in-wrong-order',
explanation: 'Nesting must be proper',
string: '((x),y))(z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unopenedParenthesis', { index: '7', string: '((x),y))(z' })],
},
],
},
]
2 changes: 1 addition & 1 deletion tests/tokenizerTests.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { tokenizerTests } from './testData/tokenizerTests.data'
const runAll = true
let onlyRun = new Map()
if (!runAll) {
onlyRun = new Map([['invalid-empty-tag-in-various-places', ['end-in-comma']]])
onlyRun = new Map([['invalid-comma-missing-or-extra', ['missing-comma-before-close']]])
}

describe('Tokenizer validation using JSON tests', () => {
Expand Down
2 changes: 1 addition & 1 deletion validator/event/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const initiallyValidateHedString = function (hedString, hedSchemas, options, def
}
if (parsedString === null) {
return [null, [].concat(...Object.values(parsingIssues)), null]
} else if (parsingIssues.syntax.length + parsingIssues.delimiter.length > 0) {
} else if (parsingIssues.syntax.length > 0) {
hedSchemas = new Schemas(null)
}
let hedValidator
Expand Down

0 comments on commit 9cc9bcc

Please sign in to comment.