Skip to content

Commit

Permalink
separate out the Greek / Hebriew accent and vowel filter
Browse files Browse the repository at this point in the history
  • Loading branch information
patricksptang committed Oct 9, 2023
1 parent 493067d commit 3fa1c0e
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1370,29 +1370,31 @@ protected void setOptions(final TransformingSAXEventProvider tsep, final List<Lo
break;
case GREEK_ACCENTS:
if (isGreek) {
tsep.setParameter("RemovePointing", "false");
tsep.setParameter("RemoveVowels", "false");
tsep.setParameter("RemoveGreekAccent", "false");
}
break;
case HEBREW_VOWELS:
if (isHebrew) {
tsep.setParameter("RemoveVowels", "false");
tsep.setParameter("RemoveHebrewVowels", "false");
}
break;
case HEBREW_ACCENTS:
if (isHebrew) {
tsep.setParameter("RemovePointing", "false");
tsep.setParameter("RemoveVowels", "false");
tsep.setParameter("RemoveHebrewPointing", "false");
tsep.setParameter("RemoveHebrewVowels", "false");
}
break;
}
}
}

//if no greek or hebrew, then override to false
if (!isGreek && !isHebrew) {
tsep.setParameter("RemovePointing", false);
tsep.setParameter("RemoveVowels", false);
if (!isGreek) {
tsep.setParameter("RemoveGreekAccent", "false");
}
if (!isHebrew) {
tsep.setParameter("RemoveHebrewPointing", "false");
tsep.setParameter("RemoveHebrewVowels", "false");
}
// SM Verse Per Line Issue for RTL
// if (!books[0].getBookMetaData().isLeftToRight()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.tyndalehouse.step.core.utils.StringUtils.isEmpty;
import static com.tyndalehouse.step.core.utils.language.GreekUtils.removeGreekTranslitMarkUpForIndexing;
Expand Down Expand Up @@ -237,6 +239,33 @@ public static String unAccent(final String word) {
return unAccent(unAccent(word, true), false);
}

public static String unAccentOrVowels(final String word, final String removeGreekAccent,
final String removeHebrewVowel, final String removeHebrewAccent) {
if (removeGreekAccent.equals("true")) {
String regex = "\\p{InGreek}";
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(word);
if (matcher.find())
// return unAccent(unAccent(word, true), false);
return unAccent(word, true);
}
if (removeHebrewVowel.equals("true") || removeHebrewAccent.equals("true")) {
String regexHebrew = "\\p{InHebrew}";
Pattern patternHebrew = Pattern.compile(regexHebrew, Pattern.CASE_INSENSITIVE);
Matcher matcherHebrew = patternHebrew.matcher(word);
if (matcherHebrew.find()) {
String updatedWord = word;
if (removeHebrewVowel.equals("true"))
// updatedWord = unAccent(unAccent(updatedWord, false), false);
updatedWord = unAccent(updatedWord, false);
if (removeHebrewAccent.equals("true"))
updatedWord = unAccentHebrewLeavingVowels(updatedWord);
return updatedWord;
}
}
return word;
}

/**
* takes accents and other punctuation off the word
*
Expand All @@ -248,7 +277,6 @@ public static String unAccent(final String word, final boolean isGreek) {
return unAccent(word, isGreek, true);
}


/**
* takes accents and other punctuation off the word
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,9 @@

<!-- true to display color coding information -->
<xsl:param name="ColorCoding" select="'false'"/>
<xsl:param name="RemovePointing" select="'true'"/>
<xsl:param name="RemoveVowels" select="'true'"/>
<xsl:param name="RemoveGreekAccent" select="'true'"/>
<xsl:param name="RemoveHebrewPointing" select="'true'"/>
<xsl:param name="RemoveHebrewVowels" select="'true'"/>
<xsl:param name="HideCompareHeaders" select="'false'"/>

<xsl:param name="HideXGen" select="'false'"/>
Expand Down Expand Up @@ -2394,11 +2395,8 @@
<xsl:when test="./ancestor::cell/@xml:lang != 'grc' and ./ancestor::cell/@xml:lang != 'he' and ./ancestor::cell/@xml:lang != 'hbo'">
<xsl:value-of select="."/>
</xsl:when>
<xsl:when test="$RemoveVowels = 'true'">
<xsl:value-of select="conversion:unAccent(string(.))"/>
</xsl:when>
<xsl:when test="$RemovePointing = 'true'">
<xsl:value-of select="conversion:unAccentLeavingVowels(string(.))"/>
<xsl:when test="$RemoveGreekAccent = 'true' or $RemoveHebrewVowels = 'true' or $RemoveHebrewPointing = 'true'">
<xsl:value-of select="conversion:unAccentOrVowels(string(.), $RemoveGreekAccent, $RemoveHebrewVowels, $RemoveHebrewPointing)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="."/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@
<!-- true to display color coding information -->
<xsl:param name="ColorCoding" select="'false'" />
<xsl:param name="DivideHebrew" select="'false'" />
<xsl:param name="RemovePointing" select="'true'" />
<xsl:param name="RemoveVowels" select="'true'" />
<xsl:param name="RemoveGreekPointing" select="'true'" />
<xsl:param name="RemoveGreekVowels" select="'true'" />
<xsl:param name="RemoveHebrewPointing" select="'true'" />
<xsl:param name="RemoveHebrewVowels" select="'true'" />


<xsl:param name="baseVersion" select="''" />
Expand Down Expand Up @@ -1956,8 +1958,8 @@
<xsl:template name="outputPunctuatedText">
<xsl:param name="text" />
<xsl:choose>
<xsl:when test="$RemoveVowels = 'true'"><xsl:value-of select="conversion:unAccent(string($text))" /></xsl:when>
<xsl:when test="$RemovePointing = 'true'"><xsl:value-of select="conversion:unAccentLeavingVowels(string($text))" /></xsl:when>
<xsl:when test="$RemoveGreekAccent = 'true' or $RemoveHebrewVowels = 'true' or $RemoveHebrewPointing = 'true'">
<xsl:value-of select="conversion:unAccentOrVowels(string($text), $RemoveGreekAccent, $RemoveHebrewVowels, $RemoveHebrewPointing)" />
<xsl:otherwise><xsl:value-of select="." /></xsl:otherwise>
</xsl:choose>
</xsl:template>
Expand Down

0 comments on commit 3fa1c0e

Please sign in to comment.