Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Add unit tests for toolbox regex patterns #14

Merged
merged 8 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"@types/node": "^18.11.16",
"@typescript-eslint/eslint-plugin": "^5.46.1",
"@typescript-eslint/parser": "^5.46.1",
"ava": "^5.1.0",
"ava": "^5.1.1",
"eslint": "^8.30.0",
"nodemon": "^2.0.20",
"pkg": "^5.8.0",
Expand Down
79 changes: 61 additions & 18 deletions src/toolbox.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,33 @@ export type markerType =
"\\ref" |
"\\t" ;

/**
* Verse start and end of a verse bridge
*/
export interface bridgeType {
start: number;
end: number;
}

/**
* Regex to parse all the variations of \vs marker (along with all the optional punctuation marks)
* \vs (section title)
* \vs (section heading)
* \vs (13-14) b
* \vs [13-14] b
* \vs 13-14 (b)
*/
export const VS_PATTERN = /\\vs\s+\*?(\d+|\(?section title\)?|\(?section heading\)?|\(\d+-\d+\)|\[\d+-\d+\]|\d+-\d+)\s?\(?([a-z])?\)?\??.*/;

/**
* Regex to parse all the variations of verse bridges to extract verse ranges
* (13-14)
* [13-14]
* 13-14
* 13a-14b
*/
export const VS_BRIDGE_PATTERN = /(\(|\[)?(\d+)[a-z]?-(\d+)[a-z]?(\)|\])?/;

/**
* Information about the Toolbox text file based on the filename
*/
Expand All @@ -53,6 +80,30 @@ export interface fileInfoType {
chapterNumber: number;
}

/**
* Determine the start and stop of a verse bridge
* @param {string} line - \vs line containing a verse bridge
* @param {number} verseNum - current verse number
*/
export function getVerseBridge(line: string, verseNum: number) : bridgeType {
let bridge: bridgeType = {
start: verseNum,
end: verseNum
};
const vsBridgeMatch = line.match(VS_BRIDGE_PATTERN);
if (vsBridgeMatch) {
// Determine the start and end of the verse bridge
if (vsBridgeMatch[2]) {
bridge.start = parseInt(vsBridgeMatch[2]);
}
if (vsBridgeMatch[3]) {
bridge.end = parseInt(vsBridgeMatch[3]);
}
}

return bridge;
}

/**
* Extract a book name and chapter number from the filename
* @param {string} file - Path to the Toolbox text file
Expand Down Expand Up @@ -218,27 +269,19 @@ export function updateObj(bookObj: books.objType, file: string, currentChapter:
}
// Determine if any other \\vs special processing needed
let vs_section_header = false, vs_verse_bridge = false, vs_other = false;
let bridgeStart = verseNum, bridgeEnd = verseNum; // Start and end of a verse bridge
// Start and end of a verse bridge
let bridge : bridgeType = {
start: verseNum,
end: verseNum
}
if (marker == '\\vs') {
const vsPattern = /\\vs\s+\*?(\d+|\(?section title\)?|\(?section heading\)?|\(\d+-\d+\)|\[\d+-\d+\])\s?([a-z])?\??.*/;
const vsPatternMatch = line.trim().match(vsPattern);
const vsPatternMatch = line.trim().match(VS_PATTERN);
if(vsPatternMatch){
if(vsPatternMatch[1].includes('section')) {
vs_section_header = true;
} else if (vsPatternMatch[1].includes('-')) {
vs_verse_bridge = true;
// Verse bridge could be marked with (x-y) or [x-y]
const vsBridgePattern = /(\(|\[)(\d+)-(\d+)(\)|\])/;
const vsBridgeMatch = vsPatternMatch[1].match(vsBridgePattern);
if (vsBridgeMatch) {
// Determine the start and end of the verse bridge
if (vsBridgeMatch[2]) {
bridgeStart = parseInt(vsBridgeMatch[2]);
}
if (vsBridgeMatch[3]) {
bridgeEnd = parseInt(vsBridgeMatch[3]);
}
}
bridge = getVerseBridge(vsPatternMatch[1], verseNum);
}
if (vsPatternMatch[2] && vsPatternMatch[2] != 'a') {
vs_other = true; // verse #-other letter besides "a"
Expand Down Expand Up @@ -326,17 +369,17 @@ export function updateObj(bookObj: books.objType, file: string, currentChapter:
case 'INCREMENT_VERSE_NUM' :
// Update verseNum to either after the end of a verse span, or increment
//verseNum++
verseNum = (vs_verse_bridge) ? bridgeEnd + 1 : verseNum + 1;
verseNum = (vs_verse_bridge) ? bridge.end + 1 : verseNum + 1;
break;
case 'MERGE_VERSES' : {
// Complicated task of merging the previous two verses, and assigning number
const lastVerse = bookObj.content[currentChapter].content.pop();
contentLength--;
bookObj.content[currentChapter].content[contentLength - 1].text += lastVerse.text;
bookObj.content[currentChapter].content[contentLength - 1].number = (vs_verse_bridge) ? bridgeStart : verseNum-1;
bookObj.content[currentChapter].content[contentLength - 1].number = (vs_verse_bridge) ? bridge.start : verseNum-1;

if (vs_verse_bridge) {
bookObj.content[currentChapter].content[contentLength - 1].bridgeEnd = bridgeEnd;
bookObj.content[currentChapter].content[contentLength - 1].bridgeEnd = bridge.end;
}
break;
}
Expand Down
141 changes: 141 additions & 0 deletions test/toolbox.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2023 SIL International
// Trivial unit test for testing toolbox.ts regex
import test from 'ava';

import { VS_PATTERN, VS_BRIDGE_PATTERN, bridgeType, getVerseBridge } from '../dist/toolbox'


/*
* VS_PATTERN tests
* \vs (section title)
* \vs (section heading)
* \vs (13-14) b
* \vs [13-14] b
* \vs 13-14 (b)
*/

test('VS_PATTERN for section title/heading', t => {
let line = "\\vs (section title)";
t.regex(line.trim(), VS_PATTERN, "(section title) matches");

line = "\\vs (section title?)";
t.regex(line.trim(), VS_PATTERN, "(section title?) matches");

line = "\\vs (section title) [?]";
t.regex(line.trim(), VS_PATTERN, "(section title) [?] matches");

line = "\\vs (section heading)";
t.regex(line.trim(), VS_PATTERN, "(section heading) matches");
});

// These do not match
test('VS_PATTERN for ?', t => {
let line = "\\vs ?";
t.notRegex(line.trim(), VS_PATTERN, "? fails to match");

line = "\\vs ? (none)";
t.notRegex(line.trim(), VS_PATTERN, "? (none) fails to match");
});

test('VS_PATTERN for verse', t => {
let line = "\\vs 9";
t.regex(line.trim(), VS_PATTERN, "vs 9 matches");

line = "\\vs 9?"
t.regex(line.trim(), VS_PATTERN, "9? matches");

line = "\\vs 8b [not in draft produced at workshop. do we want to keep this?]";
t.regex(line.trim(), VS_PATTERN, "long line matches");

line = "\\vs 9a";
t.regex(line.trim(), VS_PATTERN, "9a matches");

line = "\\vs 9a?";
t.regex(line.trim(), VS_PATTERN, "9a? matches");

line = "\\vs 14";
t.regex(line.trim(), VS_PATTERN, "14 matches");

line = "\\vs 14b";
t.regex(line.trim(), VS_PATTERN, "14b matches");

line = "\\vs 22b(?)";
t.regex(line.trim(), VS_PATTERN, "22b(?) matches");

});

test('VS_PATTERN for verse bridge', t => {
let line = "\\vs (13-14) b";
t.regex(line.trim(), VS_PATTERN, "(13-14) b matches");

line = "\\vs [13-14] b";
t.regex(line.trim(), VS_PATTERN, "[13-14] b matches");

line = "\\vs 13c-14a";
t.regex(line.trim(), VS_PATTERN, "13c-14a matches");

line = "\\vs 8-9 (b)";
t.regex(line.trim(), VS_PATTERN, "8-9 (b) matches");

line = "\\vs this should not match";
t.notRegex(line.trim(), VS_PATTERN, "this should not match");
})


/**
* Tests VS_BRIDGE_PATTERN matches and determining verse bridges
*/
test('VS_BRIDGE_PATTERN for verse ranges', t => {
let line = "(13-14)";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "(13-14) matches");
t.deepEqual(getVerseBridge(line, 13),
{
start: 13,
end: 14
}, "bridge (13, 14)");

line = "[13-14]";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "[13-14] matches");
t.deepEqual(getVerseBridge(line, 13),
{
start: 13,
end: 14
}, "bridge [13, 14]");

line = "13-14";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "13-14 matches");
t.deepEqual(getVerseBridge(line, 13),
{
start: 13,
end: 14
}, "bridge {13, 14}");

line = "8-9 (b)";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "8-9 (b) matches");
t.deepEqual(getVerseBridge(line, 8),
{
start: 8,
end: 9
}, "bridge 8-9 (b)");

line = "13-14a";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "13-14a matches");

line = "13a-14b";
t.regex(line.trim(), VS_BRIDGE_PATTERN, "13a-14b matches");
t.deepEqual(getVerseBridge(line, 13),
{
start: 13,
end: 14
}, "bridge 13a-14b");

// These do not match
line = "x15a-y21b";
t.notRegex(line.trim(), VS_BRIDGE_PATTERN, "x15a-y21b does not match");
t.deepEqual(getVerseBridge(line, 15),
{
start: 15,
end: 15
}, "bridge x15a-y21b");

})