Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tokenizer -- removed extra delimiter tests in parser #212

Merged
merged 4 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 1 addition & 118 deletions parser/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,116 +34,6 @@ class HedStringParser {
this.hedSchemas = hedSchemas
}

/**
* Check if the parentheses in a tag group match.
*
* @returns {Issue[]} Any issues found related to unmatched parentheses.
*/
_countTagGroupParentheses() {
const issues = []
const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter)
const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter)

if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
issues.push(
generateIssue('parentheses', {
opening: numberOfOpeningParentheses,
closing: numberOfClosingParentheses,
}),
)
}

return issues
}

/**
* Check if a comma is missing after an opening parenthesis.
*
* @param {string} lastNonEmptyCharacter The last non-empty character.
* @param {string} currentCharacter The current character in the HED string.
* @returns {boolean} Whether a comma is missing after a closing parenthesis.
*/
_isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) {
return (
lastNonEmptyCharacter === closingGroupCharacter &&
!(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
)
}

/**
* Find delimiter-related issues in a HED string.
*
* @returns {Issue[]} Any issues related to delimiters.
*/
_findDelimiterIssues() {
const issues = []
let lastNonEmptyValidCharacter = ''
let lastNonEmptyValidIndex = 0
let currentTag = ''

for (let i = 0; i < this.hedString.length; i++) {
const currentCharacter = this.hedString.charAt(i)
currentTag += currentCharacter

if (stringIsEmpty(currentCharacter)) {
continue
}

if (delimiters.has(currentCharacter)) {
if (currentTag.trim() === currentCharacter) {
issues.push(
generateIssue('extraDelimiter', {
character: currentCharacter,
index: i,
string: this.hedString,
}),
)
currentTag = ''
continue
}
currentTag = ''
} else if (currentCharacter === openingGroupCharacter) {
if (currentTag.trim() !== openingGroupCharacter) {
issues.push(generateIssue('commaMissing', { tag: currentTag }))
}
currentTag = ''
} else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
issues.push(
generateIssue('commaMissing', {
tag: currentTag.slice(0, -1),
}),
)
break
}

lastNonEmptyValidCharacter = currentCharacter
lastNonEmptyValidIndex = i
}

if (delimiters.has(lastNonEmptyValidCharacter)) {
issues.push(
generateIssue('extraDelimiter', {
character: lastNonEmptyValidCharacter,
index: lastNonEmptyValidIndex,
string: this.hedString,
}),
)
}

return issues
}

/**
* Validate the full unparsed HED string.
*
* @returns {Object<string, Issue[]>} Any issues found during validation.
*/
_validateFullUnparsedHedString() {
const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues())

return { delimiter: delimiterIssues }
}

/**
* Parse a full HED string.
*
Expand All @@ -154,14 +44,7 @@ class HedStringParser {
return [this.hedString, {}]
}

const fullStringIssues = this._validateFullUnparsedHedString()
if (fullStringIssues.delimiter.length > 0) {
fullStringIssues.syntax = []
return [null, fullStringIssues]
}

const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
const parsingIssues = Object.assign(fullStringIssues, splitIssues)
const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
if (parsedTags === null) {
return [null, parsingIssues]
}
Expand Down
11 changes: 11 additions & 0 deletions parser/tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ export class HedStringTokenizer {
this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma
} else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) {
this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash
}
if (
this.state.currentToken.trim().length > 0 &&
![undefined, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0])
) {
// Missing comma
this.pushIssue('commaMissing', this.state.lastDelimiter[1] + 1)
} else {
if (this.state.currentToken.trim().length > 0) {
this.pushTag(this.hedString.length)
Expand Down Expand Up @@ -261,6 +268,10 @@ export class HedStringTokenizer {
handleOpeningGroup(i) {
if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) {
this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1])
} else if (this.state.lastDelimiter[0] === CHARACTERS.CLOSING_COLUMN) {
this.pushIssue('commaMissing', this.state.lastDelimiter[1])
} else if (this.state.currentToken.trim().length > 0) {
this.pushInvalidTag('commaMissing', i, this.state.currentToken.trim())
} else {
this.state.currentGroupStack.push([])
this.state.parenthesesStack.push(new GroupSpec(i, undefined, []))
Expand Down
6 changes: 4 additions & 2 deletions tests/event.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ describe('HED string and event validation', () => {
describe('Full HED Strings', () => {
const validatorSyntactic = validatorSyntacticBase

it('should not have mismatched parentheses', () => {
//TODO: Remove this test -- the separate test for mismatched parentheses is no longer performed.
it.skip('should not have mismatched parentheses', () => {
const testStrings = {
extraOpening:
'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
Expand All @@ -84,7 +85,8 @@ describe('HED string and event validation', () => {
validatorSyntactic(testStrings, expectedIssues, (validator) => {})
})

it('should not have malformed delimiters', () => {
// TODO: This test is replaced by tokenizer tests and should be removed
it.skip('should not have malformed delimiters', () => {
const testStrings = {
missingOpeningComma:
'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
Expand Down
178 changes: 148 additions & 30 deletions tests/testData/tokenizerTests.data.js
Original file line number Diff line number Diff line change
Expand Up @@ -268,36 +268,6 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-empty-tag-in-various-places',
description: 'Empty tags in various places (empty groups are allowed).',
tests: [
{
testname: 'end-in-comma',
explanation: 'Cannot end in a comma',
string: 'x,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
},
{
testname: 'double-in-comma',
explanation: 'Cannot have double commas',
string: 'x,,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
},
{
testname: 'leading-comma',
string: ',x,y',
explanation: 'Cannot have a leading comma',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
},
],
},
{
name: 'invalid-extra-slash-in-various-places',
description: 'Tags cannot have leading or trailing, or extra slashes',
Expand Down Expand Up @@ -352,6 +322,124 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-commas',
description: 'Commas must separate tags and groups',
tests: [
{
testname: 'end-in-comma',
explanation: 'Cannot end in a comma',
string: 'x,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
},
{
testname: 'double-in-comma',
explanation: 'Cannot have double commas',
string: 'x,,y,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
},
{
testname: 'leading-comma',
string: ',x,y',
explanation: 'Cannot have a leading comma',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
},
{
testname: 'missing-comma-before-open',
explanation: 'Must have a comma before open parentheses',
string: 'x, y(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '4', string: 'x, y(z)', tag: 'y' })],
},
{
testname: 'missing-comma-before-close',
explanation: 'Must have a comma after closing parentheses',
string: 'x, (y)zp, (w)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('invalidTag', { index: '8', string: 'x, (y)zp, (w)' })],
},
{
testname: 'missing-comma-before-close-at-end',
explanation: 'Must have a comma after closing parenthesis at end of string',
string: 'x, (y)z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '6', string: 'x, (y)z' })],
},
{
testname: 'missing-comma-before-open-column',
explanation: 'Must have a comma before open brace',
string: 'x, y, {(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {(z)' })],
},
{
testname: 'missing-close-brace-before-parentheses',
explanation: 'Must have a closed-brace-after-open-brace',
string: 'x, y, {w(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {w(z)' })],
},
{
testname: 'missing-comma-before-closing-column',
explanation: 'Must have a comma before closing brace',
string: 'x, {y}(z)',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('commaMissing', { index: '5', string: 'x, {y}(z)' })],
},
{
testname: 'extra-leading-comma',
explanation: 'Leading comma not allowed',
string: ',x, (y), z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',x, (y), z' })],
},
{
testname: 'extra-closing-comma',
explanation: 'Closing comma not allowed',
string: 'x, (y), z,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,' })],
},
{
testname: 'multiple-leading-commas',
explanation: 'Multiple leading commas not allowed',
string: ',,x, (y), z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '0', string: ',,x, (y), z' })],
},
{
testname: 'multiple-closing-commas',
explanation: 'Multiple closing comma not allowed',
string: 'x, (y), z,,',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,,' })],
},
{
testname: 'multiple-internal-commas',
explanation: 'Multiple closing commas not allowed',
string: 'x, (y),, z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('emptyTagFound', { index: '6', string: 'x, (y),, z' })],
},
],
},
{
name: 'invalid-improper-curly-braces',
description: 'Curly braces cannot have commas or parentheses or other curly braces',
Expand Down Expand Up @@ -398,4 +486,34 @@ export const tokenizerTests = [
},
],
},
{
name: 'invalid-parenthesis',
description: 'Various types of mismatched parentheses',
tests: [
{
testname: 'extra-opening-parentheses',
explanation: 'Parentheses must match',
string: 'x, (((y, z), w), u',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unclosedParenthesis', { index: '3', string: 'x, (((y, z), w), u' })],
},
{
testname: 'extra-closing-parentheses',
explanation: 'Cannot parentheses inside curly braces',
string: 'x, (y, z)), w',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unopenedParenthesis', { index: '9', string: 'x, (y, z)), w' })],
},
{
testname: 'parentheses-in-wrong-order',
explanation: 'Nesting must be proper',
string: '((x),y))(z',
tagSpecs: [],
groupSpec: null,
errors: [generateIssue('unopenedParenthesis', { index: '7', string: '((x),y))(z' })],
},
],
},
]
2 changes: 1 addition & 1 deletion tests/tokenizerTests.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { tokenizerTests } from './testData/tokenizerTests.data'
const runAll = true
let onlyRun = new Map()
if (!runAll) {
onlyRun = new Map([['invalid-empty-tag-in-various-places', ['end-in-comma']]])
onlyRun = new Map([['invalid-comma-missing-or-extra', ['missing-comma-before-close']]])
}

describe('Tokenizer validation using JSON tests', () => {
Expand Down
2 changes: 1 addition & 1 deletion validator/event/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ const initiallyValidateHedString = function (hedString, hedSchemas, options, def
}
if (parsedString === null) {
return [null, [].concat(...Object.values(parsingIssues)), null]
} else if (parsingIssues.syntax.length + parsingIssues.delimiter.length > 0) {
} else if (parsingIssues.syntax.length > 0) {
hedSchemas = new Schemas(null)
}
let hedValidator
Expand Down
Loading