From 01c4cf4b5f5cc850921ad9e6a636c474c9783ce8 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 4 Nov 2024 11:29:49 -0600
Subject: [PATCH 1/3] Removing the parentheses counting -- goal rely on
 tokenizer.

---
 parser/parser.js                      |  16 ++--
 parser/tokenizer.js                   |  11 +++
 tests/dataset.spec.js                 |  26 +++---
 tests/event.spec.js                   |   6 +-
 tests/testData/tokenizerTests.data.js | 124 ++++++++++++++++++++++++++
 tests/tokenizerTests.spec.js          |   2 +-
 6 files changed, 161 insertions(+), 24 deletions(-)

diff --git a/parser/parser.js b/parser/parser.js
index a86e05f3..8bf25e45 100644
--- a/parser/parser.js
+++ b/parser/parser.js
@@ -154,14 +154,14 @@ class HedStringParser {
       return [this.hedString, {}]
     }
 
-    const fullStringIssues = this._validateFullUnparsedHedString()
-    if (fullStringIssues.delimiter.length > 0) {
-      fullStringIssues.syntax = []
-      return [null, fullStringIssues]
-    }
-
-    const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
-    const parsingIssues = Object.assign(fullStringIssues, splitIssues)
+    // const fullStringIssues = this._validateFullUnparsedHedString()
+    // if (fullStringIssues.delimiter.length > 0) {
+    //   fullStringIssues.syntax = []
+    //   return [null, fullStringIssues]
+    // }
+
+    const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
+    //const parsingIssues = Object.assign(fullStringIssues, splitIssues)
     if (parsedTags === null) {
       return [null, parsingIssues]
     }
diff --git a/parser/tokenizer.js b/parser/tokenizer.js
index 8ffcfd6a..c905d188 100644
--- a/parser/tokenizer.js
+++ b/parser/tokenizer.js
@@ -179,6 +179,13 @@ export class HedStringTokenizer {
       this.pushIssue('emptyTagFound', this.state.lastDelimiter[1]) // Extra comma
     } else if (this.state.lastSlash >= 0 && this.hedString.slice(this.state.lastSlash + 1).trim().length === 0) {
       this.pushIssue('extraSlash', this.state.lastSlash) // Extra slash
+    }
+    if (
+      this.state.currentToken.trim().length > 0 &&
+      ![undefined, CHARACTERS.COMMA].includes(this.state.lastDelimiter[0])
+    ) {
+      // Missing comma
+      this.pushIssue('commaMissing', this.state.lastDelimiter[1] + 1)
     } else {
       if (this.state.currentToken.trim().length > 0) {
         this.pushTag(this.hedString.length)
@@ -261,6 +268,10 @@ export class HedStringTokenizer {
   handleOpeningGroup(i) {
     if (this.state.lastDelimiter[0] === CHARACTERS.OPENING_COLUMN) {
       this.pushIssue('unclosedCurlyBrace', this.state.lastDelimiter[1])
+    } else if (this.state.lastDelimiter[0] === CHARACTERS.CLOSING_COLUMN) {
+      this.pushIssue('commaMissing', this.state.lastDelimiter[1])
+    } else if (this.state.currentToken.trim().length > 0) {
+      this.pushInvalidTag('commaMissing', i, this.state.currentToken.trim())
     } else {
       this.state.currentGroupStack.push([])
       this.state.parenthesesStack.push(new GroupSpec(i, undefined, []))
diff --git a/tests/dataset.spec.js b/tests/dataset.spec.js
index 8963264f..f8d09365 100644
--- a/tests/dataset.spec.js
+++ b/tests/dataset.spec.js
@@ -38,7 +38,7 @@ describe('HED dataset validation', () => {
       const testDatasets = {
         empty: [],
         singleValidLong: ['Event/Sensory-event'],
-        singleValidShort: ['Sensory-event'],
+        /*        singleValidShort: ['Sensory-event'],
         multipleValidLong: [
           'Event/Sensory-event',
           'Item/Object/Man-made-object/Vehicle/Train',
@@ -46,23 +46,23 @@ describe('HED dataset validation', () => {
         ],
         multipleValidShort: ['Sensory-event', 'Train', 'RGB-red/0.5'],
         multipleValidMixed: ['Event/Sensory-event', 'Train', 'RGB-red/0.5'],
-        multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],
+        multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],*/
       }
       const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour']
       const expectedIssues = {
         empty: [],
         singleValidLong: [],
-        singleValidShort: [],
-        multipleValidLong: [],
-        multipleValidShort: [],
-        multipleValidMixed: [],
-        multipleInvalid: [
-          generateValidationIssue('unitClassInvalidUnit', {
-            tag: testDatasets.multipleInvalid[0],
-            unitClassUnits: legalTimeUnits.sort().join(','),
-          }),
-          generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }),
-        ],
+        /*       singleValidShort: [],
+              multipleValidLong: [],
+              multipleValidShort: [],
+              multipleValidMixed: [],
+              multipleInvalid: [
+                generateValidationIssue('unitClassInvalidUnit', {
+                  tag: testDatasets.multipleInvalid[0],
+                  unitClassUnits: legalTimeUnits.sort().join(','),
+                }),
+                generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }),
+              ],*/
       }
       return validator(testDatasets, expectedIssues)
     })
diff --git a/tests/event.spec.js b/tests/event.spec.js
index 88e22882..fe9027dd 100644
--- a/tests/event.spec.js
+++ b/tests/event.spec.js
@@ -61,7 +61,8 @@ describe('HED string and event validation', () => {
     describe('Full HED Strings', () => {
       const validatorSyntactic = validatorSyntacticBase
 
-      it('should not have mismatched parentheses', () => {
+      //TODO: Remove this test -- the separate test for mismatched parentheses is no longer performed.
+      it.skip('should not have mismatched parentheses', () => {
         const testStrings = {
           extraOpening:
             'Action/Reach/To touch,((Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
@@ -84,7 +85,8 @@ describe('HED string and event validation', () => {
         validatorSyntactic(testStrings, expectedIssues, (validator) => {})
       })
 
-      it('should not have malformed delimiters', () => {
+      // TODO: This test is replaced by tokenizer tests and should be removed
+      it.skip('should not have malformed delimiters', () => {
         const testStrings = {
           missingOpeningComma:
             'Action/Reach/To touch(Attribute/Object side/Left,Participant/Effect/Body part/Arm),Attribute/Location/Screen/Top/70 px,Attribute/Location/Screen/Left/23 px',
diff --git a/tests/testData/tokenizerTests.data.js b/tests/testData/tokenizerTests.data.js
index 759f03d8..232b7635 100644
--- a/tests/testData/tokenizerTests.data.js
+++ b/tests/testData/tokenizerTests.data.js
@@ -352,6 +352,100 @@ export const tokenizerTests = [
       },
     ],
   },
+  {
+    name: 'invalid-comma-missing-or-extra',
+    description: 'Commas must separate tags and groups',
+    tests: [
+      {
+        testname: 'missing-comma-before-open',
+        explanation: 'Must have a comma before open parentheses',
+        string: 'x, y(z)',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('commaMissing', { index: '4', string: 'x, y(z)', tag: 'y' })],
+      },
+      {
+        testname: 'missing-comma-before-close',
+        explanation: 'Must have a comma after closing parentheses',
+        string: 'x, (y)zp, (w)',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('invalidTag', { index: '8', string: 'x, (y)zp, (w)' })],
+      },
+      {
+        testname: 'missing-comma-before-close-at-end',
+        explanation: 'Must have a comma after closing parenthesis at end of string',
+        string: 'x, (y)z',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('commaMissing', { index: '6', string: 'x, (y)z' })],
+      },
+      {
+        testname: 'missing-comma-before-open-column',
+        explanation: 'Must have a comma before open brace',
+        string: 'x, y, {(z)',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {(z)' })],
+      },
+      {
+        testname: 'missing-close-brace-before-parentheses',
+        explanation: 'Must have a closed-brace-after-open-brace',
+        string: 'x, y, {w(z)',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('unclosedCurlyBrace', { index: '6', string: 'x, y, {w(z)' })],
+      },
+      {
+        testname: 'missing-comma-before-closing-column',
+        explanation: 'Must have a comma before closing brace',
+        string: 'x, {y}(z)',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('commaMissing', { index: '5', string: 'x, {y}(z)' })],
+      },
+      {
+        testname: 'extra-leading-comma',
+        explanation: 'Leading comma not allowed',
+        string: ',x, (y), z',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '0', string: ',x, (y), z' })],
+      },
+      {
+        testname: 'extra-closing-comma',
+        explanation: 'Closing comma not allowed',
+        string: 'x, (y), z,',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,' })],
+      },
+      {
+        testname: 'multiple-leading-commas',
+        explanation: 'Multiple leading commas not allowed',
+        string: ',,x, (y), z',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '0', string: ',,x, (y), z' })],
+      },
+      {
+        testname: 'multiple-closing-commas',
+        explanation: 'Multiple closing comma not allowed',
+        string: 'x, (y), z,,',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '9', string: 'x, (y), z,,' })],
+      },
+      {
+        testname: 'multiple-internal-commas',
+        explanation: 'Multiple closing commas not allowed',
+        string: 'x, (y),, z',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '6', string: 'x, (y),, z' })],
+      },
+    ],
+  },
   {
     name: 'invalid-improper-curly-braces',
     description: 'Curly braces cannot have commas or parentheses or other curly braces',
@@ -398,4 +492,34 @@ export const tokenizerTests = [
       },
     ],
   },
+  {
+    name: 'invalid-parenthesis',
+    description: 'Various types of mismatched parentheses',
+    tests: [
+      {
+        testname: 'extra-opening-parentheses',
+        explanation: 'Parentheses must match',
+        string: 'x, (((y, z), w), u',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('unclosedParenthesis', { index: '3', string: 'x, (((y, z), w), u' })],
+      },
+      {
+        testname: 'extra-closing-parentheses',
+        explanation: 'Cannot parentheses inside curly braces',
+        string: 'x, (y, z)), w',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('unopenedParenthesis', { index: '9', string: 'x, (y, z)), w' })],
+      },
+      {
+        testname: 'parentheses-in-wrong-order',
+        explanation: 'Nesting must be proper',
+        string: '((x),y))(z',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('unopenedParenthesis', { index: '7', string: '((x),y))(z' })],
+      },
+    ],
+  },
 ]
diff --git a/tests/tokenizerTests.spec.js b/tests/tokenizerTests.spec.js
index 90df6709..38584a4a 100644
--- a/tests/tokenizerTests.spec.js
+++ b/tests/tokenizerTests.spec.js
@@ -10,7 +10,7 @@ import { tokenizerTests } from './testData/tokenizerTests.data'
 const runAll = true
 let onlyRun = new Map()
 if (!runAll) {
-  onlyRun = new Map([['invalid-empty-tag-in-various-places', ['end-in-comma']]])
+  onlyRun = new Map([['invalid-comma-missing-or-extra', ['missing-comma-before-close']]])
 }
 
 describe('Tokenizer validation using JSON tests', () => {

From 095bc2b59d86a67818aa1affd50da2527ce047fb Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 4 Nov 2024 14:00:16 -0600
Subject: [PATCH 2/3] Removed extra parsing of delimiters already done by
 tokenizer

---
 parser/parser.js                      | 117 --------------------------
 tests/testData/tokenizerTests.data.js |  56 ++++++------
 validator/event/init.js               |   2 +-
 3 files changed, 26 insertions(+), 149 deletions(-)

diff --git a/parser/parser.js b/parser/parser.js
index 8bf25e45..22489446 100644
--- a/parser/parser.js
+++ b/parser/parser.js
@@ -34,116 +34,6 @@ class HedStringParser {
     this.hedSchemas = hedSchemas
   }
 
-  /**
-   * Check if the parentheses in a tag group match.
-   *
-   * @returns {Issue[]} Any issues found related to unmatched parentheses.
-   */
-  _countTagGroupParentheses() {
-    const issues = []
-    const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter)
-    const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter)
-
-    if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
-      issues.push(
-        generateIssue('parentheses', {
-          opening: numberOfOpeningParentheses,
-          closing: numberOfClosingParentheses,
-        }),
-      )
-    }
-
-    return issues
-  }
-
-  /**
-   * Check if a comma is missing after an opening parenthesis.
-   *
-   * @param {string} lastNonEmptyCharacter The last non-empty character.
-   * @param {string} currentCharacter The current character in the HED string.
-   * @returns {boolean} Whether a comma is missing after a closing parenthesis.
-   */
-  _isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) {
-    return (
-      lastNonEmptyCharacter === closingGroupCharacter &&
-      !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
-    )
-  }
-
-  /**
-   * Find delimiter-related issues in a HED string.
-   *
-   * @returns {Issue[]} Any issues related to delimiters.
-   */
-  _findDelimiterIssues() {
-    const issues = []
-    let lastNonEmptyValidCharacter = ''
-    let lastNonEmptyValidIndex = 0
-    let currentTag = ''
-
-    for (let i = 0; i < this.hedString.length; i++) {
-      const currentCharacter = this.hedString.charAt(i)
-      currentTag += currentCharacter
-
-      if (stringIsEmpty(currentCharacter)) {
-        continue
-      }
-
-      if (delimiters.has(currentCharacter)) {
-        if (currentTag.trim() === currentCharacter) {
-          issues.push(
-            generateIssue('extraDelimiter', {
-              character: currentCharacter,
-              index: i,
-              string: this.hedString,
-            }),
-          )
-          currentTag = ''
-          continue
-        }
-        currentTag = ''
-      } else if (currentCharacter === openingGroupCharacter) {
-        if (currentTag.trim() !== openingGroupCharacter) {
-          issues.push(generateIssue('commaMissing', { tag: currentTag }))
-        }
-        currentTag = ''
-      } else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
-        issues.push(
-          generateIssue('commaMissing', {
-            tag: currentTag.slice(0, -1),
-          }),
-        )
-        break
-      }
-
-      lastNonEmptyValidCharacter = currentCharacter
-      lastNonEmptyValidIndex = i
-    }
-
-    if (delimiters.has(lastNonEmptyValidCharacter)) {
-      issues.push(
-        generateIssue('extraDelimiter', {
-          character: lastNonEmptyValidCharacter,
-          index: lastNonEmptyValidIndex,
-          string: this.hedString,
-        }),
-      )
-    }
-
-    return issues
-  }
-
-  /**
-   * Validate the full unparsed HED string.
-   *
-   * @returns {Object<string, Issue[]>} Any issues found during validation.
-   */
-  _validateFullUnparsedHedString() {
-    const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues())
-
-    return { delimiter: delimiterIssues }
-  }
-
   /**
    * Parse a full HED string.
    *
@@ -154,14 +44,7 @@ class HedStringParser {
       return [this.hedString, {}]
     }
 
-    // const fullStringIssues = this._validateFullUnparsedHedString()
-    // if (fullStringIssues.delimiter.length > 0) {
-    //   fullStringIssues.syntax = []
-    //   return [null, fullStringIssues]
-    // }
-
     const [parsedTags, parsingIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
-    //const parsingIssues = Object.assign(fullStringIssues, splitIssues)
     if (parsedTags === null) {
       return [null, parsingIssues]
     }
diff --git a/tests/testData/tokenizerTests.data.js b/tests/testData/tokenizerTests.data.js
index 232b7635..0afaea11 100644
--- a/tests/testData/tokenizerTests.data.js
+++ b/tests/testData/tokenizerTests.data.js
@@ -268,36 +268,6 @@ export const tokenizerTests = [
       },
     ],
   },
-  {
-    name: 'invalid-empty-tag-in-various-places',
-    description: 'Empty tags in various places (empty groups are allowed).',
-    tests: [
-      {
-        testname: 'end-in-comma',
-        explanation: 'Cannot end in a comma',
-        string: 'x,y,',
-        tagSpecs: [],
-        groupSpec: null,
-        errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
-      },
-      {
-        testname: 'double-in-comma',
-        explanation: 'Cannot have double commas',
-        string: 'x,,y,',
-        tagSpecs: [],
-        groupSpec: null,
-        errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
-      },
-      {
-        testname: 'leading-comma',
-        string: ',x,y',
-        explanation: 'Cannot have a leading comma',
-        tagSpecs: [],
-        groupSpec: null,
-        errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
-      },
-    ],
-  },
   {
     name: 'invalid-extra-slash-in-various-places',
     description: 'Tags cannot have leading or trailing, or extra slashes',
@@ -353,9 +323,33 @@ export const tokenizerTests = [
     ],
   },
   {
-    name: 'invalid-comma-missing-or-extra',
+    name: 'invalid-commas',
     description: 'Commas must separate tags and groups',
     tests: [
+      {
+        testname: 'end-in-comma',
+        explanation: 'Cannot end in a comma',
+        string: 'x,y,',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '3', string: 'x,y,' })],
+      },
+      {
+        testname: 'double-in-comma',
+        explanation: 'Cannot have double commas',
+        string: 'x,,y,',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '1', string: 'x,,y,' })],
+      },
+      {
+        testname: 'leading-comma',
+        string: ',x,y',
+        explanation: 'Cannot have a leading comma',
+        tagSpecs: [],
+        groupSpec: null,
+        errors: [generateIssue('emptyTagFound', { index: '0', string: ',x,y' })],
+      },
       {
         testname: 'missing-comma-before-open',
         explanation: 'Must have a comma before open parentheses',
diff --git a/validator/event/init.js b/validator/event/init.js
index 6c27f44d..fe316010 100644
--- a/validator/event/init.js
+++ b/validator/event/init.js
@@ -31,7 +31,7 @@ const initiallyValidateHedString = function (hedString, hedSchemas, options, def
   }
   if (parsedString === null) {
     return [null, [].concat(...Object.values(parsingIssues)), null]
-  } else if (parsingIssues.syntax.length + parsingIssues.delimiter.length > 0) {
+  } else if (parsingIssues.syntax.length > 0) {
     hedSchemas = new Schemas(null)
   }
   let hedValidator

From 08956d8a6e8ade75209fec0db8dc11754bcc3a2e Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 4 Nov 2024 14:07:17 -0600
Subject: [PATCH 3/3] Restored commented out tests

---
 tests/dataset.spec.js | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/dataset.spec.js b/tests/dataset.spec.js
index f8d09365..8963264f 100644
--- a/tests/dataset.spec.js
+++ b/tests/dataset.spec.js
@@ -38,7 +38,7 @@ describe('HED dataset validation', () => {
       const testDatasets = {
         empty: [],
         singleValidLong: ['Event/Sensory-event'],
-        /*        singleValidShort: ['Sensory-event'],
+        singleValidShort: ['Sensory-event'],
         multipleValidLong: [
           'Event/Sensory-event',
           'Item/Object/Man-made-object/Vehicle/Train',
@@ -46,23 +46,23 @@ describe('HED dataset validation', () => {
         ],
         multipleValidShort: ['Sensory-event', 'Train', 'RGB-red/0.5'],
         multipleValidMixed: ['Event/Sensory-event', 'Train', 'RGB-red/0.5'],
-        multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],*/
+        multipleInvalid: ['Time-value/0.5 cm', 'InvalidEvent'],
       }
       const legalTimeUnits = ['s', 'second', 'day', 'minute', 'hour']
       const expectedIssues = {
         empty: [],
         singleValidLong: [],
-        /*       singleValidShort: [],
-              multipleValidLong: [],
-              multipleValidShort: [],
-              multipleValidMixed: [],
-              multipleInvalid: [
-                generateValidationIssue('unitClassInvalidUnit', {
-                  tag: testDatasets.multipleInvalid[0],
-                  unitClassUnits: legalTimeUnits.sort().join(','),
-                }),
-                generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }),
-              ],*/
+        singleValidShort: [],
+        multipleValidLong: [],
+        multipleValidShort: [],
+        multipleValidMixed: [],
+        multipleInvalid: [
+          generateValidationIssue('unitClassInvalidUnit', {
+            tag: testDatasets.multipleInvalid[0],
+            unitClassUnits: legalTimeUnits.sort().join(','),
+          }),
+          generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1] }),
+        ],
       }
       return validator(testDatasets, expectedIssues)
     })