Skip to content

Commit

Permalink
update perl script to update lexicon
Browse files Browse the repository at this point in the history
  • Loading branch information
patricksptang committed Jan 4, 2024
1 parent f56c45e commit b473528
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,16 @@ sub collectWordsUsedByAStrong {
print "$totalForWord,$numOfVerseFoundByServer,";
}

my $inputFile = $ARGV[0]; // A file with a list of all Strong number
my $testament = $ARGV[1]; // optional
my $specificStrong = $ARGV[2]; // optional
my $inputFile = $ARGV[0];
my $testament = $ARGV[1];
my $specificStrong = $ARGV[2];
open (FH, '<', $inputFile) or die "Could not open input file";
my %passagesForBasicStrong;
my %numOfPassagesForBasicStrong;
my @allVersions = ("ESV", "KJV", "NASB2020", "BSB", "HCSB", "RV_th", "WEB_th",
"ASV-TH", "ChiUn", "ChiUns", "NASB1995", "RWebster", "spaBES2018eb", "AraSVD");
my @hebrewVersions = ("THOT", "OSMHB", "SP", "SPMT");
my @greekVersions = ("SBLG_th", "THGNT", "TR", "Byz", "WHNU", "Elzevir", "Antoniades", "KhmKCB", "LXX_th");
my @greekVersions = ("SBLG", "THGNT", "TR", "Byz", "WHNU", "Elzevir", "Antoniades", "KhmKCB", "LXX_th");
my @greekVersionsOTNT = ("ABEn", "ABGk");

print "Strong,";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,60 @@ sub trim {
sub addFreqToStepLexicon {
my $findPattern = '^@StrNo=\t';
my $found = 0;
my $verifyCount = 0;
my $curCode = "";
my $previousCode = "";
my $printNext = 0;
foreach (@lexiconLines) {
my $currentLine = $_;
if ($currentLine =~ /$findPattern/) {
my $curCode = $';
$previousCode = $curCode;
$curCode = $';
if ($printNext) {
print " next strong number: $curCode\n";
$printNext = 0;
}
print OUT $currentLine . "\n";
if ($curCode ne "") {
if (exists($freqList{$curCode})) {
print OUT "\@StrFreqList=\t" . $freqList{$curCode} . "\n";
#print "$curCode $freqList{$curCode}\n";
$verifyCount = $freqCount{$curCode};
}
else {
print "Frequency count of $curCode is zero for all Bibles\n";
print " previous strong number: $previousCode\n";
$printNext = 1;
$verifyCount = 0;
}
}
}
elsif ($currentLine !~ /^\@StrFreqList=\t/) {
print OUT $currentLine . "\n";
}
elsif ($currentLine =~ /^\@StrFreqList=\t/) {
my $curList = $';
my @spl = split(';', $curList);
my $countOfPrevious = 0;
for (my $i = 0; $i <= $#spl; $i++) {
my @tempNum = split('@', trim($spl[$i]));
my $curNum = trim($tempNum[0]);
if (($curNum ne "") && ($curNum ne "0")) {
$countOfPrevious += $curNum;
# print "cc $curNum\n";
}
}
if ($verifyCount > 0) {
if ($verifyCount < $countOfPrevious) {
my $percentage = abs($countOfPrevious / $verifyCount);
if ($percentage > 1.3) {
print "$curCode has over 30% diff. current count: $verifyCount, previous count: $countOfPrevious, percent difference: $percentage\n";
print " previous strong number: $previousCode\n";
$printNext = 1;
}
}
}
}
}
}

Expand All @@ -47,7 +87,7 @@ sub addFreqToStepLexicon {
chomp(@lexiconLines = <STEP_LEXICON_IN>);
close STEP_LEXICON_IN;
%freqList; # initialize as global variables because they will be used in subroutine

%freqCount;
my $curStrong = "";

my $header = 1;
Expand All @@ -56,31 +96,33 @@ sub addFreqToStepLexicon {
# print $line . "\n";
my @spl = split(',', $line);
if ($header > 0) {
for (my $i = 1; $i < $#spl; $i = $i + 2) {
my @spl2 = split('-', $spl[$i]);
print trim($spl2[0]) . ";";
}
print "\n";
# for (my $i = 1; $i < $#spl; $i = $i + 2) {
# my @spl2 = split('-', $spl[$i]);
# print trim($spl2[0]) . ";";
# }
# print "\n";
$header = 0;
}
else {
$curStrong = trim($spl[0]);
my $outputString = "";
my $checkNotZero = 0;
for (my $i = 1; $i < $#spl; $i = $i + 2) {
for (my $i = 1; $i < $#spl; $i = $i + 2) {
my $curNum = trim($spl[$i]);
my $nextNum = trim($spl[$i+1]);
# print "x: $curNum $nextNum\n";
if (($curNum eq "") || ($curNum eq "0")) { $outputString .= ";";}
else {
if ($curNum eq $nextNum) { $outputString .= "$curNum;";}
else { $outputString .= "$curNum\@$nextNum;";}
$checkNotZero += $curNum;
}
}
if ($checkNotZero > 0) {
if ($checkNotZero > 0) {
$outputString =~ s/\;$//;
$freqList{$curStrong} = $outputString;
# print "$outputString\n";
$freqCount{$curStrong} = $checkNotZero;
# print "checkNotZero $checkNotZero\n";
}
}
}
Expand Down

0 comments on commit b473528

Please sign in to comment.