From 7e23b48b688fe720f2897e1b131dc72dfd39ef97 Mon Sep 17 00:00:00 2001 From: davidmoore1 Date: Fri, 19 Jan 2024 14:57:52 -0500 Subject: [PATCH] Moved reference parsing to build --- scripts/convertBooks.ts | 2 +- scripts/convertMarkdown.ts | 190 ++++++++++++++++-- {src/lib/scripts => scripts}/numeralUtils.ts | 0 scripts/stringUtils.ts | 177 ++++++++++++++++ src/lib/components/ScriptureViewSofria.svelte | 39 ++-- src/lib/components/StackView.svelte | 36 ++-- src/lib/scripts/scripture-reference-utils.ts | 156 -------------- src/lib/scripts/stringUtils.ts | 119 +---------- 8 files changed, 378 insertions(+), 341 deletions(-) rename {src/lib/scripts => scripts}/numeralUtils.ts (100%) create mode 100644 scripts/stringUtils.ts diff --git a/scripts/convertBooks.ts b/scripts/convertBooks.ts index c2d39c524..6e15ba0b3 100644 --- a/scripts/convertBooks.ts +++ b/scripts/convertBooks.ts @@ -69,7 +69,7 @@ export async function convertBooks( //custom zvideo milestone. //ignore strong number references content = replaceVideoTags(removeStrongNumberReferences(content)); - content = convertMarkdownsToMilestones(content); + content = convertMarkdownsToMilestones(content, bcid, book.id); //query Proskomma with a mutation to add a document //more efficient than original pk.addDocument call diff --git a/scripts/convertMarkdown.ts b/scripts/convertMarkdown.ts index 900b5e830..3c4697d39 100644 --- a/scripts/convertMarkdown.ts +++ b/scripts/convertMarkdown.ts @@ -1,4 +1,7 @@ -export function convertMarkdownsToMilestones(content: string): string { +import { convertDigitsInStringToDefaultNumeralSystem, getIntFromNumberString } from "./numeralUtils"; +import { containsRomanScriptLetter, getFilenameExt, getFirstDigitsAsInt, getIntFromString, isBlank, isNotBlank, isPositiveInteger, splitString, stripAllExceptDigitsAndHyphens } from "./stringUtils"; + +export function convertMarkdownsToMilestones(content: string, docSet: string, bookid: string): string { let result: string = ''; result = content; const sb = []; @@ -34,7 +37,7 @@ export function convertMarkdownsToMilestones(content: string): string { const telLink = getTelHtmlFromMarkdownLink(link, text); sb.push(telLink); } else { - const refLink = getReferenceHtmlFromMarkdownLink(link, text); + const refLink = getReferenceHtmlFromMarkdownLink(link, text, docSet, bookid); sb.push(refLink); } inputString = inputString.substring(match.index + match[0].length); @@ -114,32 +117,177 @@ function getTelHtmlFromMarkdownLink(link: string, text: string): string { ' \\ztellink-s | link="' + encodeURIComponent(link) + '"\\*' + text + ' \\ztellink-e\\* '; return result; } -function getReferenceHtmlFromMarkdownLink(link: string, text: string): string { +function getReferenceHtmlFromMarkdownLink(link: string, text: string, docSet: string, bookid: string): string { // \zreflink-s |link="ENGWEB.MAT.5.1"\*Beatitudes\zreflink-e\* \ - const result = - ' \\zreflink-s | link="' + encodeURIComponent(link) + '"\\*' + text + ' \\zreflink-e\\* '; + let result: string = ''; + const [collection, book, fromChapter, toChapter, verseRanges] = getReferenceFromString(link); + const [fromVerse, toVerse, separator] = verseRanges[0]; + if ((book === '') && (fromChapter === -1)) { + // Invalid link + result = text; + } else { + let refCollection = collection; + if (isBlank(refCollection)) { + refCollection = docSet; + } + let refBook = book; + if (isBlank(refBook)) { + refBook = bookid; + } + let refChapter = fromChapter; + if (refChapter < 1) { + refChapter = 1; + } + let refVerse = fromVerse; + if (refVerse < 1) { + refVerse = 1; + } + const reference = refCollection + '.' + refBook + '.' + refChapter.toString() + '.' + refVerse.toString(); + result = + ' \\zreflink-s | link="' + encodeURIComponent(reference) + '"\\*' + text + ' \\zreflink-e\\* '; + } return result; } -function isNotBlank(str: string): boolean { - let result: boolean; - if (str === null || str === undefined) { - result = false; + +function getReferenceFromString( + reference: string +): [string, string, number, number, [number, number, string][]] { + let bookCollectionId: string; + let bookId: string; + let fromChapter: number; + let toChapter: number; + let verseRanges: [number, number, string][]; + + bookId = ''; + fromChapter = -1; + toChapter = -1; + verseRanges = [[-1, -1, '']]; + bookCollectionId = ''; + + if (isNotBlank(reference)) { + // Look for book collection code + let refToParse: string; + + if (reference.includes('|')) { + const chPos: number = reference.indexOf('|'); + bookCollectionId = reference.substring(0, chPos); + refToParse = reference.length > chPos + 1 ? reference.substring(chPos + 1) : ''; + } else if (reference.includes('/')) { + const chPos: number = reference.indexOf('/'); + bookCollectionId = reference.substring(0, chPos); + refToParse = reference.length > chPos + 1 ? reference.substring(chPos + 1) : ''; + } else { + bookCollectionId = ''; + refToParse = reference; + + // Check if a period has been used as the book collection separator + // e.g., C01.REV.7.9 + const components: string[] = splitString(reference, '.'); + if (components.length > 2) { + if ( + containsRomanScriptLetter(components[0]) && + containsRomanScriptLetter(components[1]) + ) { + const chPos: number = reference.indexOf('.'); + bookCollectionId = reference.substring(0, chPos); + refToParse = reference.substring(chPos + 1); + } + } + } + // Replace any %20 by periods + let ref: string = refToParse.replace('%20', '.'); + + // Replace any colons or spaces by periods + ref = ref.replace(':', '.'); + ref = ref.replace(' ', '.'); + + // Replace any en-dashes by hyphens + ref = ref.replace('\u2013', '-'); + + // Replace non-breaking hyphens by ordinary hyphens + ref = ref.replace('\u2011', '-'); + + const pattern: RegExp = /(\w+)(?:.([0-9-]+))?(?:.([0-9-]+))?/; + const m: RegExpMatchArray | null = ref.match(pattern); + if (m) { + // Book collection and book + bookId = m[1]; + // Chapter number or range + let chapter: string = m[2]; + // Verse or verse range + let verses: string = m[3]; + + // For case of only verse chapter in reference + if (!containsRomanScriptLetter(m[1])) { + bookId = ''; + chapter = m[1]; + verses = m[2]; + } + if (isPositiveInteger(chapter)) { + fromChapter = getIntFromString(chapter); + toChapter = fromChapter; + } else { + [fromChapter, toChapter] = parseChapterRange(chapter); + } + if (isNotBlank(verses)) { + verseRanges = parseVerseRange(verses); + } + } + } + return [bookCollectionId, bookId, fromChapter, toChapter, verseRanges]; +} + +function parseChapterRange(chapterRange: string): [number, number] { + let fromChapter: number; + let toChapter: number; + + if (isNotBlank(chapterRange)) { + let range: string = chapterRange.replace('\u2013', '-'); + range = stripAllExceptDigitsAndHyphens(range); + const hyphenPos: number = range.indexOf('-'); + if (hyphenPos > 0) { + fromChapter = getIntFromNumberString(range.substring(0, hyphenPos)); + toChapter = getIntFromNumberString(range.substring(hyphenPos + 1)); + } else { + fromChapter = getIntFromNumberString(range); + toChapter = fromChapter; + } } else { - result = str.length > 0 && str.trim().length > 0; + fromChapter = -1; + toChapter = -1; } - return result; + return [fromChapter, toChapter]; } -function isBlank(str: string): boolean { - return !isNotBlank(str); +function parseVerseRange(verseRange: string): [number, number, string][] { + const verseRanges: [number, number, string][] = []; + + if (isNotBlank(verseRange)) { + const ranges: string[] = splitString(verseRange, ','); + for (const range of ranges) { + const vRange: [number, number, string] = parseVerseRangeString(range); + verseRanges.push(vRange); + } + } + return verseRanges; } -function getFilenameExt(filename: string): string { - let extension: string = ''; - if (isNotBlank(filename)) { - const i = filename.lastIndexOf('.'); - const p = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\')); - if (i > p) { - extension = filename.substring(i + 1); +function parseVerseRangeString(input: string): [number, number, string] { + let fromVerse: number = -1; + let toVerse: number = -1; + let separator: string = ''; + let inputToUse: string = isNotBlank(input) ? input.trim() : ''; + if (isNotBlank(input)) { + // Replace en-dash by hyphen + inputToUse = input.replace('\u2013', '-'); + // Replace any non-default numeral system digits + inputToUse = convertDigitsInStringToDefaultNumeralSystem(inputToUse); + const VERSE_RANGE_PATTERN: RegExp = /(\d+(\w?))(?:\u200F?([-,])(\d+(\w?)))?/; + const match: RegExpMatchArray | null = inputToUse.match(VERSE_RANGE_PATTERN); + + if (match) { + fromVerse = isNotBlank(match[1]) ? getFirstDigitsAsInt(match[1]) : -1; + separator = isNotBlank(match[3]) ? match[3] : ''; + toVerse = isNotBlank(match[4]) ? getFirstDigitsAsInt(match[4]) : -1; } } - return extension; + return [fromVerse, toVerse, separator]; } diff --git a/src/lib/scripts/numeralUtils.ts b/scripts/numeralUtils.ts similarity index 100% rename from src/lib/scripts/numeralUtils.ts rename to scripts/numeralUtils.ts diff --git a/scripts/stringUtils.ts b/scripts/stringUtils.ts new file mode 100644 index 000000000..dd954705d --- /dev/null +++ b/scripts/stringUtils.ts @@ -0,0 +1,177 @@ +export function isNotBlank(str: string | null): boolean { + let result: boolean; + if (str === null || str === undefined) { + result = false; + } else { + result = str.length > 0 && str.trim().length > 0; + } + return result; +} +export function isBlank(str: string | null): boolean { + return !isNotBlank(str); +} +export function getFilenameExt(filename: string): string { + let extension: string = ''; + if (isNotBlank(filename)) { + const i = filename.lastIndexOf('.'); + const p = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\')); + if (i > p) { + extension = filename.substring(i + 1); + } + } + return extension; +} +export function filenameWithoutPath(filename: string): string { + let result: string = ''; + if (isNotBlank(filename)) { + const fname: string = filename.replace('\\', '/'); + result = fname.includes('/') ? fname.substring(fname.lastIndexOf('/') + 1) : fname; + } + return result; +} +export function isPositiveInteger(str: string): boolean { + let result: boolean; + + if (isBlank(str)) { + result = false; + } else { + result = true; + + for (let i = 0; i < str.length; i++) { + const c = str.charAt(i); + if (!isDigit(c)) { + result = false; + break; + } + } + } + + return result; +} +export function containsRomanScriptLetter(input: string): boolean { + return /.*[a-zA-Z].*/.test(input); +} +export function stripNonDigits(input: string | null): string | null { + if (input === null) { + return null; + } + + const sb: string[] = []; + + for (let i = 0; i < input.length; i++) { + const c = input.charAt(i); + if (c.charCodeAt(0) > 47 && c.charCodeAt(0) < 58) { + sb.push(c); + } + } + + return sb.join(''); +} +export function stripAllExceptDigitsAndHyphens(input: string): string { + const sb: string[] = []; + + for (let i = 0; i < input.length; i++) { + const c: string = input.charAt(i); + if (/[0-9-]/.test(c)) { + sb.push(c); + } + } + + return sb.join(''); +} +export function nextDigits(input: string | null, start: number): string { + // Returns next digit string starting from the given start pos + let result: string = ''; + + if (input !== null) { + let numDigits: number = 0; + let firstDigitPos: number = -1; + + for (let i = start; i < input.length; i++) { + const c = input.charAt(i); + if (c.charCodeAt(0) > 47 && c.charCodeAt(0) < 58) { + if (firstDigitPos < 0) { + firstDigitPos = i; + } + numDigits++; + } else if (numDigits > 0) { + break; + } + } + + if (numDigits > 0) { + result = input.substring(firstDigitPos, firstDigitPos + numDigits); + } + } + + return result; +} +export function parseToInt(input: string | null, defaultValue: number): number { + let result: number; + + if (isBlank(input)) { + result = defaultValue; + } else { + try { + result = parseInt(input!, 10); + } catch (ex) { + result = defaultValue; + } + } + + return result; +} +export function getFirstDigits(input: string | null): string { + return nextDigits(input, 0); +} + +export function getFirstDigitsAsInt(input: string | null): number { + let result: number = 0; + const digits: string = getFirstDigits(input); + + if (isNotBlank(digits)) { + result = parseToInt(digits, 0); + } + + return result; +} +export function getIntFromString(input: string | null): number { + const strDigits: string | null = stripNonDigits(input); + return getFirstDigitsAsInt(strDigits); +} + +export function isDigit(c: string): boolean { + return /\d/.test(c); +} + +export function splitString(text: string, separator: string): string[] { + // More efficient than String.split, since it does not use regex + const result: string[] = []; + + if (text && text.length > 0) { + let index1: number = 0; + let index2: number = text.indexOf(separator); + + while (index2 >= 0) { + const token: string = text.substring(index1, index2); + result.push(token); + index1 = index2 + 1; + index2 = text.indexOf(separator, index1); + } + + if (index1 < text.length) { + result.push(text.substring(index1)); + } + } + + return result; +} +export function padWithInitialZeros(input: string, length: number): string { + let result: string = input; + + while (result.length < length) { + result = '0' + result; + } + + return result; +} diff --git a/src/lib/components/ScriptureViewSofria.svelte b/src/lib/components/ScriptureViewSofria.svelte index 11cf3d745..b2c89e50c 100644 --- a/src/lib/components/ScriptureViewSofria.svelte +++ b/src/lib/components/ScriptureViewSofria.svelte @@ -13,7 +13,7 @@ TODO: import config from '$lib/data/config'; import { base } from '$app/paths'; import { footnotes, isBibleBook, refs } from '$lib/data/stores'; - import { generateHTML, getReferenceFromString } from '$lib/scripts/scripture-reference-utils'; + import { generateHTML } from '$lib/scripts/scripture-reference-utils'; import { onClickText, deselectAllElements, @@ -25,7 +25,7 @@ TODO: import { seekToVerse, hasAudioPlayed } from '$lib/data/audio'; import { audioPlayer } from '$lib/data/stores'; import { getAudioLinkHtml, getEmailLinkHtml, getReferenceLinkHtml, getTelephoneLinkHtml, getWebLinkHtml} from '$lib/scripts/milestoneLinks'; - import { isBlank, isNotBlank } from '$lib/scripts/stringUtils'; + import { splitString } from '$lib/scripts/stringUtils'; export let audioPhraseEndChars: string; export let bodyFontSize: any; @@ -297,30 +297,21 @@ TODO: // handles clicks on in text markdown reference links function referenceLinkClickHandler(event: any) { const linkRef = event.target.getAttribute('ref'); - let [collection, book, fromChapter, toChapter, verseRanges] = getReferenceFromString(linkRef); - const [fromVerse, toVerse, separator] = verseRanges[0]; - if ((book === '') && (fromChapter === -1)) { - // Invalid link - return; - } + const splitRef = splitString(linkRef, '.'); + const splitSet = splitRef[0]; + const refBook = splitRef[1]; + const splitChapter = splitRef[2]; + const splitVerse = splitRef[3]; + let refDocSet = currentDocSet; - if (isNotBlank(collection)) { - const refBc = config.bookCollections.find((x) => x.id === collection); - if (refBc) { - refDocSet = refBc.languageCode + '_' + refBc.id; - } else { - // Invalid collection - return; - } - } - if (book === '') { - book = currentBook; - } - let refVerse = fromVerse; - if (refVerse < 1) { - refVerse = 1; + const refBc = config.bookCollections.find((x) => x.id === splitSet); + if (refBc) { + refDocSet = refBc.languageCode + '_' + refBc.id; + } else { + // Invalid collection + return; } - refs.set({ docSet: refDocSet, book: book, chapter: fromChapter.toString(), verse: refVerse.toString() }); + refs.set({ docSet: refDocSet, book: refBook, chapter: splitChapter, verse:splitVerse }); return; } function addNotesDiv(workspace) { diff --git a/src/lib/components/StackView.svelte b/src/lib/components/StackView.svelte index a8988852c..60873a080 100644 --- a/src/lib/components/StackView.svelte +++ b/src/lib/components/StackView.svelte @@ -5,10 +5,9 @@