Skip to content

Commit

Permalink
code refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Jan 17, 2024
1 parent 2aae819 commit f0e18fe
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 96 deletions.
112 changes: 16 additions & 96 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -142,113 +142,33 @@ task checkDups(type: JavaExec) {
workingDir = projectDir
}

task checkAnim {
doLast {
def dictDir = file("data/dict")

def files = dictDir.listFiles().findAll { it.name =~ /.*anim.*\.lst/ }

def lines = files.collect { File f ->
f.readLines()
.findAll { it =~ /^[А-ЯІЇЄҐ]'?[а-яіїєґ](?!.*\/adj)/ && ! (it =~ /[.\/]<|-(фонд|фест)/) }
}
.flatten()
.grep { it }

java.text.Collator coll = java.text.Collator.getInstance(new Locale("uk", "UA"));
coll.setStrength(java.text.Collator.IDENTICAL)
coll.setDecomposition(java.text.Collator.NO_DECOMPOSITION)

println lines.toSorted(coll).join("\n")
println "Found ${lines.size} suspicious anim"
}
}

task checkAnim(type: JavaExec) {
classpath = sourceSets.test.runtimeClasspath
mainClass = "org.dict_uk.check.CheckAnim"

task xps {
doLast {
def xps = [:].withDefault{ [] }
def dictDir = file("data/dict")

def files = dictDir.listFiles().findAll { it.name.endsWith('.lst') }

def lines = files.collect { it.readLines() }.flatten() \
.collect { it.replaceFirst(/#.*/, '') }
.findAll{ it.contains(":xp") && ! it.startsWith("+cs") && ! it.startsWith('#') }
.collect { it
it.replaceAll(/^([^ ]+)\h.*?(:xp.).*/, '$1 $2')
}

lines.each{
def (base, xp) = it.split(/ /)
xps[base] << xp
}

xps.each { k,v -> v.sort() }

xps.each { k,v ->
if( v[0] != ':xp1' ) println "out of order: $k: $v"
else if( v.size() == 1 ) println "single: $k: $v"
else {
def dups = v.countBy{it}.grep{it.value > 1}.collect{it.key}
if( dups ) println "dups: $k: $dups"
}
}

// java.text.Collator coll = java.text.Collator.getInstance(new Locale("uk", "UA"));
// coll.setStrength(java.text.Collator.IDENTICAL)
// coll.setDecomposition(java.text.Collator.NO_DECOMPOSITION)

}
}
workingDir = projectDir
}

task checkReplacements {
doLast {
def dictDir = file("data/dict")

def files = dictDir.listFiles().findAll { it.name.endsWith('.lst') || it.name.endsWith('replace.txt') }

def replWords = files.collect { it.text.split("\n") } \
.flatten() \
.findAll { it.contains(" #> ") }
.collect {
def repls = it.split(" #> ")[1].trim().split(/[;, \|]+/)
def word = it.split(' ', 2)[0]
if( word in repls ) {
println "Replacement is same as word: $word"
}
repls
}
.flatten()
.collect { it.replaceAll(/[()]/, '').replaceFirst(/-таки$/, '') }
.findAll { it =~ /[а-яіїєґ]/ }
.unique().sort()

def spellWords = new File("out/words_spell.txt").text.split('\n')
spellWords += new File("data/dict/slang.lst").text.split('\n').collect{ it.replaceFirst(/ .*/, '') }
spellWords += new File("data/dict/arch.lst").text.split('\n').collect{ it.replaceFirst(/ .*/, '') }

println "Unique replacement words: ${replWords.size}"

replWords.removeAll(spellWords)
task checkXps(type: JavaExec) {
classpath = sourceSets.test.runtimeClasspath
mainClass = "org.dict_uk.check.CheckXps"

workingDir = projectDir
}

task checkReplacements(type: JavaExec) {
classpath = sourceSets.test.runtimeClasspath
mainClass = "org.dict_uk.check.CheckReplacements"

println "Unknown:\n" + replWords.join("\n")
println "Total uknown: ${replWords.size}"
}
workingDir = projectDir
}

task checkSemtags(type: JavaExec){
classpath = sourceSets.test.runtimeClasspath
mainClass = "org.dict_uk.check.CheckSemtags"

workingDir = projectDir

// def opts = " --indent --mfl --stats --wordlist"
// args "--aff", "../data/affix"
// args "--dict", "dict"
// args opts.split()
}
}


task showExpandCommand {
Expand Down
23 changes: 23 additions & 0 deletions src/test/groovy/org/dict_uk/check/CheckAnim.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/env groovy

package org.dict_uk.check;

def dictDir = new File("data/dict")

def files = dictDir.listFiles().findAll { it.name =~ /.*anim.*\.lst/ }

assert files

def lines = files.collect { File f ->
f.readLines()
.findAll { it =~ /^[А-ЯІЇЄҐ]'?[а-яіїєґ](?!.*\/adj)/ && ! (it =~ /[.\/]<|-(фонд|фест)/) }
}
.flatten()
.grep { it }

java.text.Collator coll = java.text.Collator.getInstance(new Locale("uk", "UA"));
coll.setStrength(java.text.Collator.IDENTICAL)
coll.setDecomposition(java.text.Collator.NO_DECOMPOSITION)

println lines.toSorted(coll).join("\n")
println "Found ${lines.size()} suspicious anim"
48 changes: 48 additions & 0 deletions src/test/groovy/org/dict_uk/check/CheckReplacements.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/env groovy

package org.dict_uk.check;

def dictDir = new File("data/dict")

def files = dictDir.listFiles().findAll { it.name.endsWith('.lst') || it.name.endsWith('replace.txt') }

assert files

def replWords = files.collect { it.text.split("\n") } \
.flatten() \
.findAll { it.contains(" #> ") }
.collect {
def replStr = it.split(" #> ")[1].trim()
def replItems = replStr.split(/\|/)
if( replItems.size() > 5 && false )
println "WARNING: Too many replacements ${replItems.size()} > 5 for\n\t$it"

def repls = replStr.split(/[;, \|]+/)
def word = it.split(' ', 2)[0]
if( word in repls && ! (it =~ /(?iu) - [а-яіїєґ].* #> /) ) {
println "WARNING: Replacement is same as word: $word:\n\t$it"
}

def dups = replItems.countBy{it}.grep{it.value > 1}.collect{it.key}
if( dups ) {
println "WARNING: Duplicate replacements: $dups:\n\t$it"
}

repls
}
.flatten()
.collect { it.replaceAll(/[()]/, '').replaceFirst(/-таки$/, '') }
.findAll { it =~ /[а-яіїєґ]/ }
.unique().sort()

def spellWords = new File("out/words_spell.txt").text.split('\n')
spellWords += new File("data/dict/slang.lst").text.split('\n').collect{ it.replaceFirst(/ .*/, '') }
spellWords += new File("data/dict/arch.lst").text.split('\n').collect{ it.replaceFirst(/ .*/, '') }

println "Unique replacement words: ${replWords.size()}"

replWords.removeAll(spellWords)


println "Unknown:\n" + replWords.join("\n")
println "Total uknown: ${replWords.size()}"
40 changes: 40 additions & 0 deletions src/test/groovy/org/dict_uk/check/CheckXps.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/env groovy

package org.dict_uk.check;

def dictDir = new File("data/dict")

def files = dictDir.listFiles().findAll { it.name.endsWith('.lst') }

assert files

def xps = [:].withDefault{ [] }

def lines = files.collect { it.readLines() }.flatten() \
.collect { it.replaceFirst(/#.*/, '') }
.findAll{ it.contains(":xp") \
&& ! it.startsWith("+cs") && ! it.startsWith('#') \
&& ! (it =~ /verb(?!.*inf)|noun(?!.*(:[mnf]:v_naz|:p:v_naz:ns))/) }
.collect { it
it.replaceAll(/^([^ ]+)\h.*?(:xp.).*/, '$1 $2')
}

lines.each{
def (base, xp) = it.split(/ /)
xps[base] << xp
}

xps.each { k,v -> v.sort() }

xps.each { k,v ->
if( v[0] != ':xp1' ) println "out of order: $k: $v"
else if( v.size() == 1 ) println "single: $k: $v"
else {
def dups = v.countBy{it}.grep{it.value > 1}.collect{it.key}
if( dups ) println "dups: $k: $dups"
}
}

// java.text.Collator coll = java.text.Collator.getInstance(new Locale("uk", "UA"));
// coll.setStrength(java.text.Collator.IDENTICAL)
// coll.setDecomposition(java.text.Collator.NO_DECOMPOSITION)

0 comments on commit f0e18fe

Please sign in to comment.