Skip to content

Commit

Permalink
Refactor + fix warnings (#146)
Browse files Browse the repository at this point in the history
  • Loading branch information
nitely authored Nov 28, 2024
1 parent 4de11ae commit 8ff12ed
Show file tree
Hide file tree
Showing 15 changed files with 355 additions and 310 deletions.
69 changes: 40 additions & 29 deletions src/regex.nim
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ This means the whole text needs to match the regex for this function to return `
.. code-block:: nim
:test:
let text = "nim c --styleCheck:hint --colors:off regex.nim"
var m: RegexMatch2
var m = RegexMatch2()
if match(text, re2"nim c (?:--(\w+:\w+) *)+ (\w+).nim", m):
doAssert text[m.group(0)] == "colors:off"
doAssert text[m.group(1)] == "regex"
Expand Down Expand Up @@ -229,7 +229,7 @@ match a given regex.
"""
var match = ""
var capture = ""
var m: RegexMatch2
var m = RegexMatch2()
if find(text, re2"(\w+)@\w+\.\w+", m):
match = text[m.boundaries]
capture = text[m.group(0)]
Expand Down Expand Up @@ -509,7 +509,7 @@ func group*(m: RegexMatch2, i: int): Slice[int] {.inline, raises: [].} =
## and they are included same as in PCRE.
runnableExamples:
let text = "abc"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(\w)+", m)
doAssert text[m.group(0)] == "c"

Expand All @@ -521,7 +521,7 @@ func group*(
## return slices for a given named group
runnableExamples:
let text = "abc"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(?P<foo>\w)+", m)
doAssert text[m.group("foo")] == "c"

Expand All @@ -530,7 +530,7 @@ func group*(
func groupsCount*(m: RegexMatch2): int {.inline, raises: [].} =
## return the number of capturing groups
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "ab".match(re2"(a)(b)", m)
doAssert m.groupsCount == 2

Expand All @@ -540,7 +540,7 @@ func groupNames*(m: RegexMatch2): seq[string] {.inline, raises: [].} =
## return the names of capturing groups.
runnableExamples:
let text = "hello world"
var m: RegexMatch2
var m = RegexMatch2()
doAssert text.match(re2"(?P<greet>hello) (?P<who>world)", m)
doAssert m.groupNames == @["greet", "who"]

Expand Down Expand Up @@ -581,7 +581,7 @@ func match*(
## is similar to ``find(text, re"^regex$", m)``
## but has better performance
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "abcd".match(re2"abcd", m)
doAssert not "abcd".match(re2"abc", m)

Expand All @@ -590,7 +590,7 @@ func match*(

func match*(s: string, pattern: Regex2): bool {.raises: [].} =
debugCheckUtf8(s, pattern)
var m: RegexMatch2
var m = RegexMatch2()
result = matchImpl(s, pattern.toRegex, m)

when defined(noRegexOpt):
Expand Down Expand Up @@ -631,8 +631,8 @@ iterator findAll*(
debugCheckUtf8(s, pattern)
var i = start
var i2 = start-1
var m: RegexMatch2
var ms: RegexMatches2
var m = RegexMatch2()
var ms = RegexMatches2()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern.toRegex, ms, i)
Expand All @@ -649,6 +649,7 @@ func findAll*(
pattern: Regex2,
start = 0
): seq[RegexMatch2] {.raises: [].} =
result = newSeq[RegexMatch2]()
for m in findAll(s, pattern, start):
result.add m

Expand All @@ -670,7 +671,7 @@ iterator findAllBounds*(
debugCheckUtf8(s, pattern)
var i = start
var i2 = start-1
var ms: RegexMatches2
var ms = RegexMatches2()
let flags = {mfNoCaptures}
while i <= len(s):
doAssert(i > i2); i2 = i
Expand All @@ -687,6 +688,7 @@ func findAllBounds*(
pattern: Regex2,
start = 0
): seq[Slice[int]] {.raises: [].} =
result = newSeq[Slice[int]]()
for m in findAllBounds(s, pattern, start):
result.add m

Expand All @@ -699,7 +701,7 @@ func find*(
## search through the string looking for the first
## location where there is a match
runnableExamples:
var m: RegexMatch2
var m = RegexMatch2()
doAssert "abcd".find(re2"bc", m) and
m.boundaries == 1 .. 2
doAssert not "abcd".find(re2"de", m)
Expand Down Expand Up @@ -738,7 +740,7 @@ iterator split*(s: string, sep: Regex2): string {.inline, raises: [].} =
first, last, i = 0
i2 = -1
done = false
ms: RegexMatches2
ms = RegexMatches2()
flags = {mfNoCaptures}
while not done:
doAssert(i > i2); i2 = i
Expand All @@ -757,6 +759,7 @@ func split*(s: string, sep: Regex2): seq[string] {.raises: [].} =
doAssert split("11a22Ϊ33Ⓐ44弢55", re2"\d+") ==
@["", "a", "Ϊ", "", "弢", ""]

result = newSeq[string]()
for w in split(s, sep):
result.add w

Expand All @@ -770,12 +773,13 @@ func splitIncl*(s: string, sep: Regex2): seq[string] {.raises: [].} =

template ab: untyped = m.boundaries
debugCheckUtf8(s, sep)
result = newSeq[string]()
var
first, last, i = 0
i2 = -1
done = false
m: RegexMatch2
ms: RegexMatches2
m = RegexMatch2()
ms = RegexMatches2()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep.toRegex, ms, i)
Expand Down Expand Up @@ -967,7 +971,7 @@ proc toString(
proc toString(pattern: Regex2): string {.used.} =
## NFA to string representation.
## For debugging purposes
var visited: set[int16]
var visited: set[int16] = {}
result = pattern.toString(0, visited)

#
Expand Down Expand Up @@ -1013,6 +1017,7 @@ func group*(
func groupFirstCapture*(
m: RegexMatch, i: int, text: string
): string {.inline, raises: [], deprecated.} =
result = ""
for bounds in m.group i:
return text[bounds]

Expand Down Expand Up @@ -1083,7 +1088,7 @@ func match*(

func match*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use match(string, Regex2) instead".} =
debugCheckUtf8 s
var m: RegexMatch
var m = RegexMatch()
result = matchImpl(s, pattern, m)

iterator findAll*(
Expand All @@ -1094,8 +1099,8 @@ iterator findAll*(
debugCheckUtf8 s
var i = start
var i2 = start-1
var m: RegexMatch
var ms: RegexMatches
var m = RegexMatch()
var ms = RegexMatches()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern, ms, i)
Expand All @@ -1112,6 +1117,7 @@ func findAll*(
pattern: Regex,
start = 0
): seq[RegexMatch] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} =
result = newSeq[RegexMatch]()
for m in findAll(s, pattern, start):
result.add m

Expand All @@ -1123,7 +1129,7 @@ iterator findAllBounds*(
debugCheckUtf8 s
var i = start
var i2 = start-1
var ms: RegexMatches
var ms = RegexMatches()
while i <= len(s):
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, pattern, ms, i)
Expand All @@ -1139,12 +1145,14 @@ func findAllBounds*(
pattern: Regex,
start = 0
): seq[Slice[int]] {.raises: [], deprecated: "use findAllBounds(string, Regex2) instead".} =
result = newSeq[Slice[int]]()
for m in findAllBounds(s, pattern, start):
result.add m

func findAndCaptureAll*(
s: string, pattern: Regex
): seq[string] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} =
result = newSeq[string]()
for m in s.findAll(pattern):
result.add s[m.boundaries]

Expand Down Expand Up @@ -1173,7 +1181,7 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated:
first, last, i = 0
i2 = -1
done = false
ms: RegexMatches
ms = RegexMatches()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep, ms, i)
Expand All @@ -1186,18 +1194,20 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated:
first = ab.b+1

func split*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use split(string, Regex2) instead".} =
result = newSeq[string]()
for w in split(s, sep):
result.add w

func splitIncl*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use splitIncl(string, Regex2) instead".} =
template ab: untyped = m.boundaries
debugCheckUtf8 s
result = newSeq[string]()
var
first, last, i = 0
i2 = -1
done = false
m: RegexMatch
ms: RegexMatches
m = RegexMatch()
ms = RegexMatches()
while not done:
doAssert(i > i2); i2 = i
i = findSomeOptTpl(s, sep, ms, i)
Expand Down Expand Up @@ -1231,7 +1241,7 @@ func endsWith*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use e
debugCheckUtf8 s
result = false
var
m: RegexMatch
m = default(RegexMatch)
i = 0
while i < s.len:
result = match(s, pattern, m, i)
Expand Down Expand Up @@ -1329,7 +1339,7 @@ proc toString(
proc toString(pattern: Regex): string {.used.} =
## NFA to string representation.
## For debugging purposes
var visited: set[int16]
var visited: set[int16] = {}
result = pattern.toString(0, visited)

{.pop.} # {.push warning[Deprecated]: off.}
Expand All @@ -1340,7 +1350,7 @@ when isMainModule:
import ./regex/dotgraph

func toAtoms(s: string): string =
var groups: GroupsCapture
var groups = default(GroupsCapture)
let atoms = s
.parse
.toAtoms(groups)
Expand Down Expand Up @@ -1403,7 +1413,7 @@ when isMainModule:
doAssert r"[[:xdigit:]]".toAtoms == "[[0-9a-fA-F]]"
doAssert r"[[:alpha:][:digit:]]".toAtoms == "[[a-zA-Z][0-9]]"

var m: RegexMatch2
var m = RegexMatch2()
#doAssert match("abc", re2(r"abc", {reAscii}), m)
doAssert match("abc", re2"abc", m)
doAssert match("ab", re2"a(b|c)", m)
Expand Down Expand Up @@ -1571,6 +1581,7 @@ when isMainModule:

# subset of tests.nim
proc raisesMsg(pattern: string): string =
result = ""
try:
discard re2(pattern)
except RegexError:
Expand All @@ -1582,7 +1593,7 @@ when isMainModule:
(proc() = body)()

test:
var m: RegexMatch2
var m = RegexMatch2()
doAssert match("ac", re2"a(b|c)", m)
doAssert(not match("ad", re2"a(b|c)", m))
doAssert match("ab", re2"(ab)*", m)
Expand Down Expand Up @@ -1692,7 +1703,7 @@ when isMainModule:
m.captures == @[0 .. 3, reNonCapture, reNonCapture]
block:
var m = false
var matches: seq[string]
var matches = newSeq[string]()
match "abc", rex"(\w+)":
doAssert matches == @["abc"]
m = true
Expand Down
1 change: 1 addition & 0 deletions src/regex/common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type
func verifyUtf8*(s: string): int =
## Return `-1` if `s` is a valid utf-8 string.
## Otherwise, return the index of the first bad char.
result = -1
var state = vusStart
var i = 0
let L = s.len
Expand Down
2 changes: 1 addition & 1 deletion src/regex/compiler.nim
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ when defined(regexDotDir):
func reImpl*(s: string, flags: RegexFlags = {}): Regex =
if regexArbitraryBytes notin flags and verifyUtf8(s) != -1:
raise newException(RegexError, "Invalid utf-8 regex")
var groups: GroupsCapture
var groups = default(GroupsCapture)
let rpn = s
.parse(flags)
.transformExp(groups, flags)
Expand Down
1 change: 1 addition & 0 deletions src/regex/dotgraph.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import ./types
func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] =
doAssert not isEpsilonTransition(n)
doAssert nti <= n.next.len-1
result = newSeq[int]()
for i in nti+1 .. n.next.len-1:
if not isEpsilonTransition(nfa.s[n.next[i]]):
break
Expand Down
Loading

0 comments on commit 8ff12ed

Please sign in to comment.