Skip to content

Commit

Permalink
Write new, smaller tag converter for HED 3 tags
Browse files Browse the repository at this point in the history
This is functionally complete, though I may do some fine-tuning.
  • Loading branch information
happy5214 committed Jul 19, 2024
1 parent 3ead3e1 commit 5431fdd
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 125 deletions.
42 changes: 42 additions & 0 deletions parser/converter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { generateIssue, IssueError } from '../common/issues/issues'
import { getTagSlashIndices } from '../utils/hedStrings'

/**
* Retrieve the {@link SchemaTag} object for a tag specification.
*
* @param {TagSpec} tagSpec The tag specification to convert.
* @param {Schemas} hedSchemas The HED schema collection.
* @returns {[SchemaTag, string]} The schema's corresponding tag object and the remainder of the tag string.
*/
export default function convertTagSpecToSchemaTag(tagSpec, hedSchemas) {
const tagString = tagSpec.tag
const tagLevels = tagString.split('/')
const tagSlashes = getTagSlashIndices(tagString)
const tagMapping = hedSchemas.getSchema(tagSpec.library).entries.tags
let schemaTag1 = tagMapping.getEntry(tagLevels[0].toLowerCase())
if (!schemaTag1) {
throw new IssueError(generateIssue('invalidTag', { tag: tagString, bounds: tagSpec.bounds }))
} else if (tagLevels.length === 1) {
return [schemaTag1, '']
}
for (let i = 1; i < tagLevels.length; i++) {
if (schemaTag1.valueTag) {
return [schemaTag1.valueTag, tagLevels.slice(i).join('/')]
}
const schemaTag2 = tagMapping.getEntry(tagLevels[i].toLowerCase())
if (!schemaTag2) {
return [schemaTag1, tagLevels.slice(i).join('/')]
}
if (schemaTag2.parent !== schemaTag1) {
throw new IssueError(
generateIssue('invalidParentNode', {
tag: tagLevels[i],
parentTag: schemaTag2.longName,
bounds: [tagSpec.bounds[0] + tagSlashes[i - 1] + 1, tagSpec.bounds[0] + (tagSlashes[i] ?? tagString.length)],
}),
)
}
schemaTag1 = schemaTag2
}
return [schemaTag1, '']
}
91 changes: 49 additions & 42 deletions parser/parsedHedTag.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import { Schema } from '../common/schema/types'
import { convertPartialHedStringToLong } from '../converter/converter'
import { getTagLevels, replaceTagNameWithPound } from '../utils/hedStrings'
import ParsedHedSubstring from './parsedHedSubstring'
import convertTagSpecToSchemaTag from './converter'
import { SchemaValueTag } from '../validator/schema/types'

/**
* A parsed HED tag.
Expand Down Expand Up @@ -33,15 +35,13 @@ export class ParsedHedTag extends ParsedHedSubstring {
* Constructor.
*
* @param {string} originalTag The original HED tag.
* @param {string} hedString The original HED string.
* @param {number[]} originalBounds The bounds of the HED tag in the original HED string.
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} schemaName The label of this tag's schema in the dataset's schema spec.
*/
constructor(originalTag, hedString, originalBounds, hedSchemas, schemaName = '') {
constructor(originalTag, originalBounds) {
super(originalTag, originalBounds)

this._convertTag(hedString, hedSchemas, schemaName)
this.canonicalTag = this.originalTag
this.conversionIssues = []

this.formattedTag = this._formatTag()
}
Expand All @@ -68,19 +68,6 @@ export class ParsedHedTag extends ParsedHedSubstring {
return this.toString()
}

/**
* Convert this tag to long form.
*
* @param {string} hedString The original HED string.
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} schemaName The label of this tag's schema in the dataset's schema spec.
*/
// eslint-disable-next-line no-unused-vars
_convertTag(hedString, hedSchemas, schemaName) {
this.canonicalTag = this.originalTag
this.conversionIssues = []
}

/**
* Format this HED tag by removing newlines, double quotes, and slashes.
*
Expand Down Expand Up @@ -293,24 +280,42 @@ export class ParsedHedTag extends ParsedHedSubstring {
*/
export class ParsedHed3Tag extends ParsedHedTag {
/**
* Convert this tag to long form.
* Constructor.
*
* @param {TagSpec} tagSpec The token for this tag.
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} hedString The original HED string.
*/
constructor(tagSpec, hedSchemas, hedString) {
super(tagSpec.tag, tagSpec.bounds)

this._convertTag(hedSchemas, hedString, tagSpec)

this.formattedTag = this._formatTag()
}

/**
* The schema's representation of this tag.
*
* @type {SchemaTag}
* @private
*/
_schemaTag

/**
* Convert this tag to long form.
*
* @param {Schemas} hedSchemas The collection of HED schemas.
* @param {string} schemaName The label of this tag's schema in the dataset's schema spec.
* @param {string} hedString The original HED string.
* @param {TagSpec} tagSpec The token for this tag.
*/
_convertTag(hedString, hedSchemas, schemaName) {
_convertTag(hedSchemas, hedString, tagSpec) {
const hed3ValidCharacters = /^[^{}[\]()~,\0\t]+$/
if (!hed3ValidCharacters.test(this.originalTag)) {
throw new Error('The parser failed to properly remove an illegal or special character.')
}

if (hedSchemas.isSyntaxOnly) {
this.canonicalTag = this.originalTag
this.conversionIssues = []
return
}

const schemaName = tagSpec.library
this.schema = hedSchemas.getSchema(schemaName)
if (this.schema === undefined) {
if (schemaName !== '') {
Expand All @@ -331,14 +336,20 @@ export class ParsedHed3Tag extends ParsedHedTag {
return
}

const [canonicalTag, conversionIssues] = convertPartialHedStringToLong(
this.schema,
this.originalTag,
hedString,
this.originalBounds[0],
)
this.canonicalTag = canonicalTag
this.conversionIssues = conversionIssues
try {
const [schemaTag, remainder] = convertTagSpecToSchemaTag(tagSpec, hedSchemas)
this._schemaTag = schemaTag
if (this._schemaTag instanceof SchemaValueTag) {
this.canonicalTag = this._schemaTag.parent.longName + '/' + remainder
} else if (remainder) {
this.canonicalTag = this._schemaTag.longName + '/' + remainder
} else {
this.canonicalTag = this._schemaTag.longName
}
this.conversionIssues = []
} catch (error) {
this.conversionIssues = [error.issue]
}
}

/**
Expand All @@ -347,7 +358,7 @@ export class ParsedHed3Tag extends ParsedHedTag {
* @returns {string} The nicely formatted version of this tag.
*/
format() {
let tagName = this.schema?.entries?.tags?.getLongNameEntry(this.formattedTag)?.longName
let tagName = this.canonicalTag
if (tagName === undefined) {
tagName = this.originalTag
}
Expand Down Expand Up @@ -394,11 +405,7 @@ export class ParsedHed3Tag extends ParsedHedTag {
*/
get takesValueTag() {
return this._memoize('takesValueTag', () => {
if (this.takesValueFormattedTag !== null) {
return this.schema?.entries?.tags?.getLongNameEntry(this.takesValueFormattedTag)
} else {
return null
}
return this._schemaTag
})
}

Expand All @@ -420,7 +427,7 @@ export class ParsedHed3Tag extends ParsedHedTag {
*/
get hasUnitClass() {
return this._memoize('hasUnitClass', () => {
if (this.takesValueTag === null) {
if (!this.takesValueTag) {
return false
}
return this.takesValueTag.hasUnitClasses
Expand Down
23 changes: 17 additions & 6 deletions parser/splitHedString.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ import { ParsedHed2Tag } from '../validator/hed2/parser/parsedHed2Tag'
import { HedStringTokenizer, ColumnSpliceSpec, TagSpec } from './tokenizer'

const generationToClass = [
ParsedHedTag,
ParsedHedTag, // Generation 1 is not supported by this validator.
ParsedHed2Tag,
ParsedHed3Tag,
(originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) =>
new ParsedHedTag(originalTag, originalBounds),
(originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) =>
new ParsedHedTag(originalTag, originalBounds), // Generation 1 is not supported by this validator.
(originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) =>
new ParsedHed2Tag(originalTag, hedString, originalBounds, hedSchemas, schemaName),
(originalTag, hedString, originalBounds, hedSchemas, schemaName, tagSpec) =>
new ParsedHed3Tag(tagSpec, hedSchemas, hedString),
]

/**
Expand All @@ -26,11 +30,18 @@ const generationToClass = [
const createParsedTags = function (hedString, hedSchemas, tagSpecs, groupSpecs) {
const conversionIssues = []
const syntaxIssues = []
const ParsedHedTagClass = generationToClass[hedSchemas.generation]
const ParsedHedTagConstructor = generationToClass[hedSchemas.generation]

const createParsedTag = (tagSpec) => {
if (tagSpec instanceof TagSpec) {
const parsedTag = new ParsedHedTagClass(tagSpec.tag, hedString, tagSpec.bounds, hedSchemas, tagSpec.library)
const parsedTag = ParsedHedTagConstructor(
tagSpec.tag,
hedString,
tagSpec.bounds,
hedSchemas,
tagSpec.library,
tagSpec,
)
conversionIssues.push(...parsedTag.conversionIssues)
return parsedTag
} else if (tagSpec instanceof ColumnSpliceSpec) {
Expand Down
10 changes: 6 additions & 4 deletions tests/bids.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ describe('BIDS datasets', () => {
],
error_and_good: [
BidsHedIssue.fromHedIssue(
converterGenerateIssue('invalidTag', 'Confused', {}, [0, 8]),
generateIssue('invalidTag', { tag: 'Confused', bounds: [0, 8] }),
bidsSidecars[1][1].file,
),
],
Expand Down Expand Up @@ -160,7 +160,7 @@ describe('BIDS datasets', () => {
tag: 'Speed/300 miles',
unitClassUnits: legalSpeedUnits.sort().join(','),
})
const converterMaglevError = converterGenerateIssue('invalidTag', 'Maglev', {}, [0, 6])
const converterMaglevError = generateIssue('invalidTag', { tag: 'Maglev', bounds: [0, 6] })
const maglevError = generateIssue('invalidTag', { tag: 'Maglev' })
const maglevWarning = generateIssue('extension', { tag: 'Train/Maglev' })
const expectedIssues = {
Expand Down Expand Up @@ -192,8 +192,10 @@ describe('BIDS datasets', () => {
all_good: [],
all_bad: [
// BidsHedIssue.fromHedIssue(generateIssue('invalidTag', { tag: 'Confused' }), badDatasets[0].file),
BidsHedIssue.fromHedIssue(converterGenerateIssue('invalidTag', 'Confused', {}, [0, 8]), badDatasets[0].file),
// BidsHedIssue.fromHedIssue(converterGenerateIssue('invalidTag', 'Confused,Gray', {}, [0, 8]), badDatasets[0].file),
BidsHedIssue.fromHedIssue(
generateIssue('invalidTag', { tag: 'Confused', bounds: [0, 8] }),
badDatasets[0].file,
),
// TODO: Catch warning in sidecar validation
/* BidsHedIssue.fromHedIssue(
generateIssue('extension', { tag: 'Train/Maglev' }),
Expand Down
4 changes: 2 additions & 2 deletions tests/dataset.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ describe('HED dataset validation', () => {
generateValidationIssue('invalidTag', {
tag: testDatasets.multipleInvalid[1],
}),
generateConverterIssue('invalidTag', testDatasets.multipleInvalid[1], {}, [0, 12]),
generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1], bounds: [0, 12] }),
],
}
return validator(testDatasets, expectedIssues)
Expand Down Expand Up @@ -119,7 +119,7 @@ describe('HED dataset validation', () => {
generateValidationIssue('invalidTag', {
tag: testDatasets.multipleInvalid[1],
}),
generateConverterIssue('invalidTag', testDatasets.multipleInvalid[1], {}, [0, 12]),
generateValidationIssue('invalidTag', { tag: testDatasets.multipleInvalid[1], bounds: [0, 12] }),
],
}
return validator(testDatasets, expectedIssues)
Expand Down
52 changes: 18 additions & 34 deletions tests/event.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -963,36 +963,27 @@ describe('HED string and event validation', () => {
const testStrings = {
// Duration/20 cm is an obviously invalid tag that should not be caught due to the first error.
red: 'Property/RGB-red, Duration/20 cm',
redAndBlue: 'Property/RGB-red, Property/RGB-blue, Duration/20 cm',
redAndBlue: 'Property/RGB-red, Property/RGB-blue/Blah, Duration/20 cm',
}
const expectedIssues = {
red: [
converterGenerateIssue(
'invalidParentNode',
testStrings.red,
{
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-red',
},
[9, 16],
),
generateIssue('invalidParentNode', {
tag: 'RGB-red',
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-red',
bounds: [9, 16],
}),
],
redAndBlue: [
converterGenerateIssue(
'invalidParentNode',
testStrings.redAndBlue,
{
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-red',
},
[9, 16],
),
converterGenerateIssue(
'invalidParentNode',
testStrings.redAndBlue,
{
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-blue',
},
[27, 35],
),
generateIssue('invalidParentNode', {
tag: 'RGB-red',
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-red',
bounds: [9, 16],
}),
generateIssue('invalidParentNode', {
tag: 'RGB-blue',
parentTag: 'Property/Sensory-property/Sensory-attribute/Visual-attribute/Color/RGB-color/RGB-blue',
bounds: [27, 35],
}),
],
}
// This is a no-op since short-to-long conversion errors are handled in the string parsing phase.
Expand Down Expand Up @@ -1108,7 +1099,7 @@ describe('HED string and event validation', () => {
}),
],
illegalComma: [
converterGenerateIssue('invalidTag', testStrings.illegalComma, { tag: 'This' }, [22, 26]),
generateIssue('invalidTag', { tag: 'This/Is/A/Tag', bounds: [22, 35] }),
/* Intentionally not thrown (validation ends at parsing stage)
generateIssue('extraCommaOrInvalid', {
previousTag: 'Label/This_is_a_label',
Expand Down Expand Up @@ -1588,16 +1579,9 @@ describe('HED string and event validation', () => {
],
missingRequiredUnit: [],
wrongLocation: [
converterGenerateIssue(
'invalidParentNode',
testStrings.wrongLocation,
{ parentTag: 'Item/Biological-item/Organism' },
[7, 15],
),
/* Intentionally not thrown (validation ends at parsing stage)
generateIssue('invalidPlaceholder', {
tag: testStrings.wrongLocation,
}), */
}),
],
}
return validatorSemantic(testStrings, expectedIssues, true)
Expand Down
Loading

0 comments on commit 5431fdd

Please sign in to comment.