diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/GeneratorApp.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/GeneratorApp.kt index 9714e91..3f21c61 100644 --- a/Lushu/src/main/kotlin/lushu/ContextGrammar/GeneratorApp.kt +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/GeneratorApp.kt @@ -24,6 +24,7 @@ fun main(args: Array) { println( "options:\nHTML = 0\nC = 1\n" ) + return } } diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Grammar.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Grammar.kt index 30844e3..c366281 100644 --- a/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Grammar.kt +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Grammar.kt @@ -1,12 +1,12 @@ package lushu.ContextGrammar.Grammar -import lushu.ContextGrammar.MapGrammar.MapGrammar +import lushu.ContextGrammar.MapGrammar.Grammar import java.io.File import java.io.FileReader class Grammar( private val contextAnalyzer: ContextAnalyzer = ContextAnalyzer(), - private val mapGrammar: MapGrammar = MapGrammar() + private val mapGrammar: Grammar = Grammar() ) { fun consume(words: MutableList): String { val consumedWords = contextAnalyzer.parsing(words) diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Rules.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Rules.kt index f74d66a..0b4d06f 100644 --- a/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Rules.kt +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/Grammar/Rules.kt @@ -46,7 +46,7 @@ class Rules(private val root: GrammarNode = GrammarNode()) { inputTokens: MutableList, mutableTokens: MutableList, index: Int, - current: GrammarNode? + current: GrammarNode?, ): Pair { when { // don't match @@ -147,7 +147,7 @@ class Rules(private val root: GrammarNode = GrammarNode()) { inputTokens: MutableList, mutableTokens: MutableList, index: Int, - current: GrammarNode? + current: GrammarNode?, ): Int { when { current == null -> return noMatchFound @@ -172,7 +172,7 @@ class Rules(private val root: GrammarNode = GrammarNode()) { inputTokens, mutableTokens, index, - current + current, ) if ((nextNode != null) && (nextNode.isSensitive())) { @@ -274,7 +274,7 @@ class Rules(private val root: GrammarNode = GrammarNode()) { dsl.isSensitive(), dsl.isStar(), dsl.isNonMergeable(), - endOfContext + endOfContext, ) dsl.setIsCase(nextCases) diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/MapGrammar.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextMap.kt similarity index 62% rename from Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/MapGrammar.kt rename to Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextMap.kt index 208e7e6..403533b 100644 --- a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/MapGrammar.kt +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextMap.kt @@ -1,25 +1,18 @@ package lushu.ContextGrammar.MapGrammar -import lushu.Merger.Lattice.Node.MergeableToken +import lushu.Merger.Lattice.Node.* import lushu.Merger.Lattice.Node.MergerS -import lushu.Merger.Lattice.Node.Node -import lushu.Merger.Lattice.Node.NonMergeableToken -import lushu.Merger.Lattice.Node.Token -import org.slf4j.LoggerFactory -import java.io.File -import java.io.FileReader - -class MapGrammar() { - private val logger = LoggerFactory.getLogger(this::class.java) +import lushu.Merger.Merger.Merger +class ContextMap( + val dsl: DSL = DSL(), + val merger: Merger, +) { data class PivotData( val openingRef: Token, - val func: String + val func: String, ) - private val dsl: DSL = DSL() - private val merger = MergerS.merger() - // openingMap contains the first tags of contexts. For instance, in ' // BEGIN <*>bla END ', BEGIN is an opening token. private val openingMap: MutableMap = mutableMapOf() @@ -30,48 +23,10 @@ class MapGrammar() { // pivotMap stores the string we are searching for. For instance, in ' BEGIN // <*>bla END ', 'bla' is a pivot token. - private val pivotMap: MutableMap = mutableMapOf() - - // TODO: remove once we support customizable actions from the user side - // -aholmquist 2023-11-03 - private var maxBlocks: Int = 0 - - // TODO: remove maxBlocks once we support customizable actions from the user - // side -aholmquist 2023-11-02 - fun getMaxBlocks(): Int = maxBlocks - - fun consume(file: File) = FileReader(file).use { reader -> - reader.forEachLine { - consume(it) - } - } - - fun consume(input: String) = streamString(input) - - fun addContext(contextInput: String?) { - if (contextInput.isNullOrBlank()) { - return - } - val contexts = extractContext(contextInput) - contexts.forEach { insertContext(it) } - logger.debug("Maps after adding context:\n$openingMap\n$closingMap\n$pivotMap") - } - - private fun streamString(input: String) = - blankRegex.findAll(input).forEach { matchResult -> match(matchResult.value) } - - private fun string2list(string: String): List { - val woNewline = string.split(newlineDelim).joinToString(spaceDelim) - return woNewline.split(spaceDelim) - } + private val pivotMap: MutableMap = mutableMapOf() - private fun toToken(word: String): Token { - val isMergeable = dsl.isMergeable(word) - val woTags = dsl.removeAllTags(word) - if (isMergeable) { - return MergeableToken(woTags) - } - return NonMergeableToken(woTags) + private inline fun insert(map: MutableMap, key: Token, value: V) { + map[key] = value } private inline fun inMap(map: MutableMap, token: Token): Token? { @@ -102,7 +57,7 @@ class MapGrammar() { this.openingMap[token] = oldValue + value } - private fun match(word: String) { + fun match(word: String) { val wordTokens = merger.tokensFromString(word) val openingFound = inMap(openingMap, wordTokens) @@ -122,58 +77,32 @@ class MapGrammar() { if (pivot.component1().match(wordTokens)) { val openingReference = pivot.component2().openingRef val value = openingMap[openingReference] - if (value != null && value > 1) { - maxBlocks++ + if (value != null && value >= 1) { } } } } - private fun extractContext(input: String): List { - val matcher = contextRegex.findAll(input) - val substrings = matcher.map { it.groupValues[1] }.toList() - return substrings - } - - private fun getLambdaFunction(input: String): String { - val regex = Regex(".*.+.*") - val matchResult = regex.find(input) - - val res = matchResult?.groups?.get(1)?.value - if (res.isNullOrBlank()) { - return "println" - } - return res - } - - private inline fun insert(map: MutableMap, key: Token, value: V) { - map[key] = value - } - private inline fun insert(map: MutableMap, key: String, value: V) { insert(map, toToken(key), value) } private fun insertPivot(pivots: List, openingToken: Token) { pivots.forEach { pivot -> - val func = getLambdaFunction(pivot) + val func = dsl.getLambdaFunction(pivot) insert(pivotMap, pivot, PivotData(openingToken, func)) } } - private inline fun findEquivalent(map: MutableMap, token: Token): Pair { - map.forEach { element -> - if (element.key == token) { - return Pair(element.key, true) - } - } - return Pair(token, false) + private fun string2list(string: String): List { + val woNewline = string.split(Grammar.newlineDelim).joinToString(Grammar.spaceDelim) + return woNewline.split(Grammar.spaceDelim) } - private fun insertContext(context: String) { + private fun insert(context: String) { val contextTokens = string2list(context) - if (contextTokens.size < minElems) { + if (contextTokens.size < Grammar.minElems) { println("Warning: Insufficient number of elements provided.") return } @@ -182,19 +111,41 @@ class MapGrammar() { val res = findEquivalent(openingMap, token) val openingToken = res.first if (!res.second) { - insert(openingMap, openingToken, noOccurrences) + insert(openingMap, openingToken, Grammar.noOccurrences) } insert(closingMap, contextTokens.last(), openingToken) insertPivot(contextTokens.subList(1, contextTokens.size - 1), openingToken) } + fun insertContext(context: String) { + val contexts = dsl.extractContext(context) + contexts.forEach { insert(it) } + } + + private inline fun findEquivalent(map: MutableMap, token: Token): Pair { + map.forEach { element -> + if (element.key == token) { + return Pair(element.key, true) + } + } + return Pair(token, false) + } + + private fun toToken(word: String): Token { + val isMergeable = dsl.isMergeable(word) + val woTags = dsl.removeAllTags(word) + if (isMergeable) { + return MergeableToken(woTags) + } + return NonMergeableToken(woTags) + } + companion object { val minElems = 3 val noOccurrences = 0 val spaceDelim = " " val newlineDelim = "\n" - val contextRegex = Regex("""(.*?)""") val blankRegex = Regex("""\S+""") } } diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextTree.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextTree.kt new file mode 100644 index 0000000..cf6663c --- /dev/null +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/ContextTree.kt @@ -0,0 +1,57 @@ +package lushu.ContextGrammar.MapGrammar + +import lushu.ContextGrammar.Grammar.Rules +import lushu.Merger.Lattice.Node.GrammarNode +import lushu.Merger.Merger.Merger + +class ContextTree( + val dsl: DSL = DSL(), + val merger: Merger, +) { + private val root: GrammarNode = GrammarNode() + + private fun string2list(string: String): List { + val woNewline = string.split(Grammar.newlineDelim).joinToString(Grammar.spaceDelim) + return woNewline.split(Grammar.spaceDelim) + } + + private fun insert(context: MutableList, current: GrammarNode? = root) { + if (context.isNullOrEmpty()) { + return + } + + val firstWord = 0 + val word = context[firstWord] + + val func = dsl.getLambdaFunction(word) + val mergeable = dsl.isMergeable(word) + val kleene = dsl.isStarCase(word) + + val wordWoTags = dsl.removeAllTags(word) + + // true if it's the last word in the context + val endOfContext: Boolean = (context.size == 1) + + val updatedCurrent = current?.findOrAddChild( + wordWoTags, + kleene, + mergeable, + func, + endOfContext, + ) + + context.removeAt(firstWord) + + if (nextCases[Rules.starCase]) { + addContextRule(context, current) + } else { + addContextRule(context, updatedCurrent) + } + } + + fun insertContext(context: String) { + val contexts = dsl.extractContext(context) + val contextTokens = string2list(context) + insert(contextTokens.toMutableList()) + } +} diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DSL.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DSL.kt index a0d96fd..3bf1e41 100644 --- a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DSL.kt +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DSL.kt @@ -4,14 +4,18 @@ enum class Tags(val tagName: String) { CONTEXT("c"), LATTICE("l"), KLEENE("*"), - ACTION("a") + ACTION("a"), } class DSL( - val tags: List = Tags.values().map { it.tagName }.map { "<$it>" } + val tags: List = Tags.values().map { it.tagName }.map { "<$it>" }, ) { fun isMergeable(string: String): Boolean { - return string.contains("") || string.contains("") + return string.contains(Tags.LATTICE.tagName) || string.contains(Tags.LATTICE.tagName) + } + + fun isStarCase(string: String): Boolean { + return string.contains(Tags.KLEENE.tagName) || string.contains(Tags.KLEENE.tagName) } private fun opening2CloserTag(openingTag: String): String { @@ -27,7 +31,35 @@ class DSL( return tags.map { opening2CloserTag(it) }.toList() } + fun removeActionTag(word: String): String { + val regex = Regex("""]+>|""") + return word.replace(regex, "") + } + fun removeAllTags(word: String): String { - return removeTags(removeTags(word, tags.toList()), openingTags2ClosingTags(tags)) + val wordWT = removeTags(removeTags(word, tags.toList()), openingTags2ClosingTags(tags)) + return removeActionTag(wordWT) + } + + fun extractContext(input: String): List { + val matcher = Grammar.contextRegex.findAll(input) + val substrings = matcher.map { it.groupValues[1] }.toList() + return substrings + } + + fun getLambdaFunction(input: String): String { + val matchResult = actionRegex.find(input) + + val res = matchResult?.groups?.get(1)?.value + if (res.isNullOrBlank()) { + return withoutAction + } + return res + } + + companion object { + val withoutAction = "" + val actionRegex = Regex(".*.+.*") + val contextRegex = Regex("""(.*?)""") } } diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DefaultFunctions.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DefaultFunctions.kt new file mode 100644 index 0000000..3624140 --- /dev/null +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/DefaultFunctions.kt @@ -0,0 +1,13 @@ +package lushu.ContextGrammar.MapGrammar + +class DefaultFunctions { + + private fun print() { + } + + private fun count() { + } + + private fun save() { + } +} diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Grammar.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Grammar.kt new file mode 100644 index 0000000..c1e800a --- /dev/null +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Grammar.kt @@ -0,0 +1,53 @@ +package lushu.ContextGrammar.MapGrammar + +import lushu.Merger.Lattice.Node.MergerS +import lushu.Merger.Merger.Merger +import org.slf4j.LoggerFactory +import java.io.File +import java.io.FileReader + +class Grammar() { + private val logger = LoggerFactory.getLogger(this::class.java) + + private val dsl: DSL = DSL() + private val merger: Merger = MergerS.merger() + + private val contextMap: ContextMap = ContextMap(dsl, merger) + private val contextTree: ContextTree = ContextTree(dsl, merger) + + fun consume(file: File) = FileReader(file).use { reader -> + reader.forEachLine { + consume(it) + } + } + + private fun streamString(input: String) = + blankRegex.findAll(input).forEach { matchResult -> + contextMap.match(matchResult.value) + contextTree.match(matchResult.value) + } + + fun consume(input: String) = streamString(input) + + fun addContext(contextInput: String?) { + if (contextInput.isNullOrBlank()) { + return + } + if (contextInput.matches(mapContextRegex)) { + contextMap.insertContext(contextInput) + } else { + contextTree.insertContext(contextInput) + } + } + + companion object { + val minElems = 3 + val noOccurrences = 0 + val spaceDelim = " " + val newlineDelim = "\n" + + val contextRegex = Regex("""(.*?)""") + val mapContextRegex = Regex("""\S+\s*(<\*>\\S+<\/\*>\s*)+\\S+\s*<\/c>""") + val blankRegex = Regex("""\S+""") + } +} diff --git a/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Lushu.kt b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Lushu.kt new file mode 100644 index 0000000..f5622a1 --- /dev/null +++ b/Lushu/src/main/kotlin/lushu/ContextGrammar/MapGrammar/Lushu.kt @@ -0,0 +1,81 @@ +package lushu.ContextGrammar.MapGrammar + +import lushu.Merger.Lattice.Node.MergerS +import java.io.File +import java.io.FileReader + +class Lushu( + var mergerConfigFilePath: String = "", + var outFilePath: String = "", +) { + private var grammar: Grammar = Grammar() + + init { + setMergerFile(mergerConfigFilePath) + setOutputFile(outFilePath) + } + + private fun setMergerFile(configFilePath: String) { + if (this.mergerConfigFilePath.isNullOrBlank() && configFilePath.isNullOrBlank()) { + println( + "Warning: You must provide the .yaml configuration file during the instantiation of the class.\n" + + "For more information, please visit the GitHub repository at (https://github.com/lac-dcc/lushu).\n", + ) + return + } + this.mergerConfigFilePath = configFilePath + MergerS.load(configFilePath) + } + + private fun setOutputFile(outFilePath: String) { + if (this.outFilePath.isNullOrBlank() && outFilePath.isNullOrBlank()) { + println( + "Warning: The default output file will be used when functions that involve file printing are utilized.\n" + + "For more information, please visit the GitHub repository at (https://github.com/lac-dcc/lushu).\n", + ) + return + } + } + + fun addRules(file: File) = FileReader(file).use { reader -> + reader.forEachLine { + addRules(it) + } + } + + fun addRules(rule: String) { + grammar.addContext(rule) + } + + fun addRules() { + var rule: String? = null + if (rule == null) { + rule = readlnOrNull() + } + while (!rule.isNullOrEmpty()) { + addRules(rule) + rule = readlnOrNull() + } + } + + fun consume(input: String) { + grammar.consume(input) + } + + fun consume(file: File) = FileReader(file).use { reader -> + reader.forEachLine { + consume(it) + } + } + + fun consume() { + var input: String? = null + if (input == null) { + input = readlnOrNull() + } + while (!input.isNullOrEmpty()) { + addRules(input) + input = readlnOrNull() + } + } +} diff --git a/Lushu/src/main/kotlin/lushu/Merger/Lattice/Node/GrammarNode.kt b/Lushu/src/main/kotlin/lushu/Merger/Lattice/Node/GrammarNode.kt index 4f437eb..f0f5cd4 100644 --- a/Lushu/src/main/kotlin/lushu/Merger/Lattice/Node/GrammarNode.kt +++ b/Lushu/src/main/kotlin/lushu/Merger/Lattice/Node/GrammarNode.kt @@ -3,30 +3,30 @@ package lushu.Merger.Lattice.Node import lushu.Merger.Merger.Merger class GrammarNode( - var tokens: List = listOf(), - val sensitive: Boolean = false, + var token: Token? = null, val star: Boolean = false, - val nonmergeable: Boolean = false, - val terminal: Boolean = false, + val mergeable: Boolean = false, + val action: String = "", + val endOfContext: Boolean = false, val parent: GrammarNode? = null, - private val children: MutableList = mutableListOf() + private val children: MutableList = mutableListOf(), ) { - fun isSensitive(): Boolean { - return this.sensitive - } - fun isStar(): Boolean { return this.star } - fun isNonMergeable(): Boolean { - return this.nonmergeable + fun isEndOfContext(): Boolean { + return this.endOfContext + } + + fun getAction(): String { + return this.action } - fun isTerminal(): Boolean { - return this.terminal + fun isMergeable(): Boolean { + return this.mergeable } /** @@ -38,30 +38,6 @@ class GrammarNode( return this.children } - /** - * Determines if the given word matches the regular expression defined by the current instance. - * - * @param word to be matched against the regular expression. - * @return true if the word matches the regular expression, false otherwise. - */ - fun match(word: String): Boolean { - val tokens = MergerS.merger().tokensFromString(word) - return match(tokens) - } - - fun match(tokens: List): Boolean { - val res = MergerS.merger().merge(this.tokens, tokens) - - if (this.isNonMergeable()) { - return this.tokens == tokens - } - - if (res.success && !this.isNonMergeable()) { - this.tokens = res.tokens - } - return res.success - } - /** * Checks if two Node elements are equivalent. * @@ -71,29 +47,15 @@ class GrammarNode( */ override fun equals(other: Any?): Boolean = when (other) { is GrammarNode -> ( - this.match(other.tokens) && - this.sensitive == other.sensitive && + this.token.match(other.token) && this.star == other.star && - this.nonmergeable == other.nonmergeable && - this.terminal == other.terminal + this.mergeable == other.mergeable && + this.endOfContext == other.endOfContext ) else -> false } - /* TODO */ - /* While the updater only modifies tokens and children */ - fun update(element: GrammarNode) { - if (element.tokens.isNotEmpty()) { - this.tokens = element.tokens - } - // add sensitive case - if (element.children.isNotEmpty()) { - this.children.clear() - this.children.addAll(element.children) - } - } - /** * Finds the equivalent Node from the children list or returns the original node if not found. * @@ -101,7 +63,7 @@ class GrammarNode( * @return The equivalent Node if found in the children list; otherwise, returns the original node. */ fun getEquivalentNode(node: GrammarNode): GrammarNode { - return getChildren().find { it.equals(node) } ?: node + return getChildren().find { it == node } ?: node } /** @@ -112,7 +74,7 @@ class GrammarNode( * @return A new list containing the filtered nodes based on the 'nonMergeable' parameter. */ fun filterMergeblesNodes(mergeable: Boolean): List { - return children.filter { it.isNonMergeable() == !mergeable } + return children.filter { it.isMergeable() == !mergeable } } /** @@ -142,10 +104,10 @@ class GrammarNode( sensitive: Boolean, star: Boolean, nonmergeable: Boolean, - terminal: Boolean, - children: MutableList + endOfContext: Boolean, + children: MutableList, ) { - val newNode = GrammarNode(tokens, sensitive, star, nonmergeable, terminal, this, children) + val newNode = GrammarNode(tokens, sensitive, star, nonmergeable, endOfContext, this, children) addChild(newNode) } @@ -154,13 +116,20 @@ class GrammarNode( * @param newNode The node to be added as a child. */ private fun addChild(newNode: GrammarNode) { - if (newNode.isNonMergeable()) { + if (newNode.isMergeable()) { children.add(firstIndex, newNode) } else { mergeChildren(newNode) } } + private fun toToken(word: String, mergeable: Boolean): Token { + if (mergeable) { + return MergeableToken(word) + } + return NonMergeableToken(word) + } + /** * Finds an existing child node in the 'children' set that matches the provided criteria, * or adds a new node with the given properties if no match is found. @@ -174,15 +143,15 @@ class GrammarNode( */ fun findOrAddChild( word: String, - sensitive: Boolean, star: Boolean, - nonmergeable: Boolean, - terminal: Boolean + mergeable: Boolean, + action: String, + endOfContext: Boolean, ): GrammarNode { - val token = MergerS.merger().tokensFromString(word) - val newNode = GrammarNode(token, sensitive, star, nonmergeable, terminal, this, mutableListOf()) + val token = toToken(word, mergeable) + val newNode = GrammarNode(token, star, mergeable, action, endOfContext, this, mutableListOf()) - val existingChild = children.find { it.equals(newNode) } + val existingChild = children.find { it == newNode } if (existingChild != null) { return existingChild } @@ -196,24 +165,6 @@ class GrammarNode( return resNode } - fun treeToStringPreorder(current: GrammarNode, level: Int = 0): String { - val indentation = "-".repeat(level) - val result = StringBuilder() - - result.append("$indentation${current.tokens}\n") - - if (!current.children.isNullOrEmpty()) { - current.children.forEach { child -> - result.append(treeToStringPreorder(child, level + 1)) - } - } - return result.toString() - } - - override fun toString(): String { - return treeToStringPreorder(this) - } - companion object { private val firstIndex = 0 } diff --git a/Lushu/src/main/kotlin/lushu/TestApps/StressTest/Context/WithLushu/App.kt b/Lushu/src/main/kotlin/lushu/TestApps/StressTest/Context/WithLushu/App.kt index 961ec84..0ccebb9 100644 --- a/Lushu/src/main/kotlin/lushu/TestApps/StressTest/Context/WithLushu/App.kt +++ b/Lushu/src/main/kotlin/lushu/TestApps/StressTest/Context/WithLushu/App.kt @@ -8,7 +8,7 @@ fun main(args: Array) { if (args.size < 4) { println( "Usage: \n" + - "\t " + "\t ", ) return }