diff --git a/src/regex.nim b/src/regex.nim index 24f4a53..8580174 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -188,7 +188,7 @@ This means the whole text needs to match the regex for this function to return ` .. code-block:: nim :test: let text = "nim c --styleCheck:hint --colors:off regex.nim" - var m: RegexMatch2 + var m = RegexMatch2() if match(text, re2"nim c (?:--(\w+:\w+) *)+ (\w+).nim", m): doAssert text[m.group(0)] == "colors:off" doAssert text[m.group(1)] == "regex" @@ -229,7 +229,7 @@ match a given regex. """ var match = "" var capture = "" - var m: RegexMatch2 + var m = RegexMatch2() if find(text, re2"(\w+)@\w+\.\w+", m): match = text[m.boundaries] capture = text[m.group(0)] @@ -509,7 +509,7 @@ func group*(m: RegexMatch2, i: int): Slice[int] {.inline, raises: [].} = ## and they are included same as in PCRE. runnableExamples: let text = "abc" - var m: RegexMatch2 + var m = RegexMatch2() doAssert text.match(re2"(\w)+", m) doAssert text[m.group(0)] == "c" @@ -521,7 +521,7 @@ func group*( ## return slices for a given named group runnableExamples: let text = "abc" - var m: RegexMatch2 + var m = RegexMatch2() doAssert text.match(re2"(?P\w)+", m) doAssert text[m.group("foo")] == "c" @@ -530,7 +530,7 @@ func group*( func groupsCount*(m: RegexMatch2): int {.inline, raises: [].} = ## return the number of capturing groups runnableExamples: - var m: RegexMatch2 + var m = RegexMatch2() doAssert "ab".match(re2"(a)(b)", m) doAssert m.groupsCount == 2 @@ -540,7 +540,7 @@ func groupNames*(m: RegexMatch2): seq[string] {.inline, raises: [].} = ## return the names of capturing groups. runnableExamples: let text = "hello world" - var m: RegexMatch2 + var m = RegexMatch2() doAssert text.match(re2"(?Phello) (?Pworld)", m) doAssert m.groupNames == @["greet", "who"] @@ -581,7 +581,7 @@ func match*( ## is similar to ``find(text, re"^regex$", m)`` ## but has better performance runnableExamples: - var m: RegexMatch2 + var m = RegexMatch2() doAssert "abcd".match(re2"abcd", m) doAssert not "abcd".match(re2"abc", m) @@ -590,7 +590,7 @@ func match*( func match*(s: string, pattern: Regex2): bool {.raises: [].} = debugCheckUtf8(s, pattern) - var m: RegexMatch2 + var m = RegexMatch2() result = matchImpl(s, pattern.toRegex, m) when defined(noRegexOpt): @@ -631,8 +631,8 @@ iterator findAll*( debugCheckUtf8(s, pattern) var i = start var i2 = start-1 - var m: RegexMatch2 - var ms: RegexMatches2 + var m = RegexMatch2() + var ms = RegexMatches2() while i <= len(s): doAssert(i > i2); i2 = i i = findSomeOptTpl(s, pattern.toRegex, ms, i) @@ -649,6 +649,7 @@ func findAll*( pattern: Regex2, start = 0 ): seq[RegexMatch2] {.raises: [].} = + result = newSeq[RegexMatch2]() for m in findAll(s, pattern, start): result.add m @@ -670,7 +671,7 @@ iterator findAllBounds*( debugCheckUtf8(s, pattern) var i = start var i2 = start-1 - var ms: RegexMatches2 + var ms = RegexMatches2() let flags = {mfNoCaptures} while i <= len(s): doAssert(i > i2); i2 = i @@ -687,6 +688,7 @@ func findAllBounds*( pattern: Regex2, start = 0 ): seq[Slice[int]] {.raises: [].} = + result = newSeq[Slice[int]]() for m in findAllBounds(s, pattern, start): result.add m @@ -699,7 +701,7 @@ func find*( ## search through the string looking for the first ## location where there is a match runnableExamples: - var m: RegexMatch2 + var m = RegexMatch2() doAssert "abcd".find(re2"bc", m) and m.boundaries == 1 .. 2 doAssert not "abcd".find(re2"de", m) @@ -738,7 +740,7 @@ iterator split*(s: string, sep: Regex2): string {.inline, raises: [].} = first, last, i = 0 i2 = -1 done = false - ms: RegexMatches2 + ms = RegexMatches2() flags = {mfNoCaptures} while not done: doAssert(i > i2); i2 = i @@ -757,6 +759,7 @@ func split*(s: string, sep: Regex2): seq[string] {.raises: [].} = doAssert split("11a22Ϊ33Ⓐ44弢55", re2"\d+") == @["", "a", "Ϊ", "Ⓐ", "弢", ""] + result = newSeq[string]() for w in split(s, sep): result.add w @@ -770,12 +773,13 @@ func splitIncl*(s: string, sep: Regex2): seq[string] {.raises: [].} = template ab: untyped = m.boundaries debugCheckUtf8(s, sep) + result = newSeq[string]() var first, last, i = 0 i2 = -1 done = false - m: RegexMatch2 - ms: RegexMatches2 + m = RegexMatch2() + ms = RegexMatches2() while not done: doAssert(i > i2); i2 = i i = findSomeOptTpl(s, sep.toRegex, ms, i) @@ -967,7 +971,7 @@ proc toString( proc toString(pattern: Regex2): string {.used.} = ## NFA to string representation. ## For debugging purposes - var visited: set[int16] + var visited: set[int16] = {} result = pattern.toString(0, visited) # @@ -1013,6 +1017,7 @@ func group*( func groupFirstCapture*( m: RegexMatch, i: int, text: string ): string {.inline, raises: [], deprecated.} = + result = "" for bounds in m.group i: return text[bounds] @@ -1083,7 +1088,7 @@ func match*( func match*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use match(string, Regex2) instead".} = debugCheckUtf8 s - var m: RegexMatch + var m = RegexMatch() result = matchImpl(s, pattern, m) iterator findAll*( @@ -1094,8 +1099,8 @@ iterator findAll*( debugCheckUtf8 s var i = start var i2 = start-1 - var m: RegexMatch - var ms: RegexMatches + var m = RegexMatch() + var ms = RegexMatches() while i <= len(s): doAssert(i > i2); i2 = i i = findSomeOptTpl(s, pattern, ms, i) @@ -1112,6 +1117,7 @@ func findAll*( pattern: Regex, start = 0 ): seq[RegexMatch] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} = + result = newSeq[RegexMatch]() for m in findAll(s, pattern, start): result.add m @@ -1123,7 +1129,7 @@ iterator findAllBounds*( debugCheckUtf8 s var i = start var i2 = start-1 - var ms: RegexMatches + var ms = RegexMatches() while i <= len(s): doAssert(i > i2); i2 = i i = findSomeOptTpl(s, pattern, ms, i) @@ -1139,12 +1145,14 @@ func findAllBounds*( pattern: Regex, start = 0 ): seq[Slice[int]] {.raises: [], deprecated: "use findAllBounds(string, Regex2) instead".} = + result = newSeq[Slice[int]]() for m in findAllBounds(s, pattern, start): result.add m func findAndCaptureAll*( s: string, pattern: Regex ): seq[string] {.raises: [], deprecated: "use findAll(string, Regex2) instead".} = + result = newSeq[string]() for m in s.findAll(pattern): result.add s[m.boundaries] @@ -1173,7 +1181,7 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated: first, last, i = 0 i2 = -1 done = false - ms: RegexMatches + ms = RegexMatches() while not done: doAssert(i > i2); i2 = i i = findSomeOptTpl(s, sep, ms, i) @@ -1186,18 +1194,20 @@ iterator split*(s: string, sep: Regex): string {.inline, raises: [], deprecated: first = ab.b+1 func split*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use split(string, Regex2) instead".} = + result = newSeq[string]() for w in split(s, sep): result.add w func splitIncl*(s: string, sep: Regex): seq[string] {.raises: [], deprecated: "use splitIncl(string, Regex2) instead".} = template ab: untyped = m.boundaries debugCheckUtf8 s + result = newSeq[string]() var first, last, i = 0 i2 = -1 done = false - m: RegexMatch - ms: RegexMatches + m = RegexMatch() + ms = RegexMatches() while not done: doAssert(i > i2); i2 = i i = findSomeOptTpl(s, sep, ms, i) @@ -1231,7 +1241,7 @@ func endsWith*(s: string, pattern: Regex): bool {.raises: [], deprecated: "use e debugCheckUtf8 s result = false var - m: RegexMatch + m = default(RegexMatch) i = 0 while i < s.len: result = match(s, pattern, m, i) @@ -1329,7 +1339,7 @@ proc toString( proc toString(pattern: Regex): string {.used.} = ## NFA to string representation. ## For debugging purposes - var visited: set[int16] + var visited: set[int16] = {} result = pattern.toString(0, visited) {.pop.} # {.push warning[Deprecated]: off.} @@ -1340,7 +1350,7 @@ when isMainModule: import ./regex/dotgraph func toAtoms(s: string): string = - var groups: GroupsCapture + var groups = default(GroupsCapture) let atoms = s .parse .toAtoms(groups) @@ -1403,7 +1413,7 @@ when isMainModule: doAssert r"[[:xdigit:]]".toAtoms == "[[0-9a-fA-F]]" doAssert r"[[:alpha:][:digit:]]".toAtoms == "[[a-zA-Z][0-9]]" - var m: RegexMatch2 + var m = RegexMatch2() #doAssert match("abc", re2(r"abc", {reAscii}), m) doAssert match("abc", re2"abc", m) doAssert match("ab", re2"a(b|c)", m) @@ -1571,6 +1581,7 @@ when isMainModule: # subset of tests.nim proc raisesMsg(pattern: string): string = + result = "" try: discard re2(pattern) except RegexError: @@ -1582,7 +1593,7 @@ when isMainModule: (proc() = body)() test: - var m: RegexMatch2 + var m = RegexMatch2() doAssert match("ac", re2"a(b|c)", m) doAssert(not match("ad", re2"a(b|c)", m)) doAssert match("ab", re2"(ab)*", m) @@ -1692,7 +1703,7 @@ when isMainModule: m.captures == @[0 .. 3, reNonCapture, reNonCapture] block: var m = false - var matches: seq[string] + var matches = newSeq[string]() match "abc", rex"(\w+)": doAssert matches == @["abc"] m = true diff --git a/src/regex/common.nim b/src/regex/common.nim index 63f37b2..c4fc6fa 100644 --- a/src/regex/common.nim +++ b/src/regex/common.nim @@ -70,6 +70,7 @@ type func verifyUtf8*(s: string): int = ## Return `-1` if `s` is a valid utf-8 string. ## Otherwise, return the index of the first bad char. + result = -1 var state = vusStart var i = 0 let L = s.len diff --git a/src/regex/compiler.nim b/src/regex/compiler.nim index 275976e..666dcc5 100644 --- a/src/regex/compiler.nim +++ b/src/regex/compiler.nim @@ -11,7 +11,7 @@ when defined(regexDotDir): func reImpl*(s: string, flags: RegexFlags = {}): Regex = if regexArbitraryBytes notin flags and verifyUtf8(s) != -1: raise newException(RegexError, "Invalid utf-8 regex") - var groups: GroupsCapture + var groups = default(GroupsCapture) let rpn = s .parse(flags) .transformExp(groups, flags) diff --git a/src/regex/dotgraph.nim b/src/regex/dotgraph.nim index e995b17..6b3048b 100644 --- a/src/regex/dotgraph.nim +++ b/src/regex/dotgraph.nim @@ -8,6 +8,7 @@ import ./types func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] = doAssert not isEpsilonTransition(n) doAssert nti <= n.next.len-1 + result = newSeq[int]() for i in nti+1 .. n.next.len-1: if not isEpsilonTransition(nfa.s[n.next[i]]): break diff --git a/src/regex/exptransformation.nim b/src/regex/exptransformation.nim index b643e5a..bc21759 100644 --- a/src/regex/exptransformation.nim +++ b/src/regex/exptransformation.nim @@ -28,6 +28,7 @@ template check(cond, msg: untyped): untyped = func fixEmptyOps(exp: Exp): Exp = ## Handle "|", "(|)", "a|", "|b", "||", "a||b", ... ## Handle "()" + result = default(Exp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 for i in 0 .. exp.s.len-1: @@ -46,6 +47,7 @@ func fixEmptyOps(exp: Exp): Exp = func greediness(exp: Exp): Exp = ## apply greediness to an expression + result = default(Exp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 var sc = exp.s.scan() @@ -156,6 +158,7 @@ func squash(flags: seq[seq[Flag]]): array[Flag, bool] = ## this will set/unset those flags ## in order. It should be done each time ## there is a group start/end + result = default(array[Flag, bool]) for ff in flags: for f in ff: result[f.toggle()] = false @@ -212,6 +215,7 @@ func applyFlag(n: var Node, f: Flag) = func applyFlags(exp: Exp, fls: RegexFlags): Exp = ## apply flags to each group + result = default(Exp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 var flags = newSeq[seq[Flag]]() @@ -276,6 +280,7 @@ func expandOneRepRange(subExpr: seq[Node], n: Node): seq[Node] = func expandRepRange(exp: Exp): Exp = ## expand every repetition range + result = default(Exp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 var i: int @@ -314,6 +319,7 @@ func expandRepRange(exp: Exp): Exp = "expected before repetition range")) func expandArbitrayBytes(exp: Exp, flags: RegexFlags): Exp = + result = default(Exp) if regexArbitraryBytes notin flags: return exp template addBytes(result, node, n: untyped): untyped = @@ -361,6 +367,7 @@ func joinAtoms(exp: Exp): AtomsExp = ## Put a ``~`` joiner between atoms. An atom is ## a piece of expression that would loose ## meaning when breaking it up (i.e.: ``a~(b|c)*~d``) + result = default(AtomsExp) result.s = newSeq[Node](exp.s.len * 2) result.s.setLen 0 var atomsCount = 0 @@ -407,13 +414,14 @@ func opsPA(nk: NodeKind): OpsPA = reZeroOrMore, reOneOrMore, reZeroOrOne: - result = (5, asyRight) + (5, asyRight) of reJoiner: - result = (4, asyLeft) + (4, asyLeft) of reOr: - result = (3, asyLeft) + (3, asyLeft) else: doAssert false + default(OpsPA) func hasPrecedence(a: NodeKind, b: NodeKind): bool = ## Check ``b`` has precedence over ``a``. @@ -454,6 +462,7 @@ func rpn(exp: AtomsExp): RpnExp = ## the parsing of the regular expression into an NFA. ## Suffix notation removes nesting and so it can ## be parsed in a linear way instead of recursively + result = default(RpnExp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 var ops = newSeq[Node]() @@ -478,6 +487,7 @@ func rpn(exp: AtomsExp): RpnExp = func subExps(exp: AtomsExp, parentKind = reLookahead): AtomsExp = ## Collect and convert lookaround sub-expressions to RPN template n: untyped = result.s[^1] + result = default(AtomsExp) result.s = newSeq[Node](exp.s.len) result.s.setLen 0 var i = 0 diff --git a/src/regex/litopt.nim b/src/regex/litopt.nim index b91fa35..4945c06 100644 --- a/src/regex/litopt.nim +++ b/src/regex/litopt.nim @@ -87,6 +87,7 @@ func update( # and (...|...) by skip nodes. # Based on Thompson's construction func toLitNfa(exp: RpnExp): LitNfa = + result = default(LitNfa) result.s = newSeq[Node](exp.s.len + 2) result.s.setLen 0 result.s.add initEoeNode() @@ -175,6 +176,7 @@ type s: string func find(nodes: seq[Node], uid: int): NodeIdx = + result = default(NodeIdx) for idx in 0 .. nodes.len-1: if nodes[idx].uid == uid: return idx.NodeIdx @@ -182,6 +184,7 @@ func find(nodes: seq[Node], uid: int): NodeIdx = func lits(exp: RpnExp, flags: RegexFlags): Lits = template state: untyped = litNfa.s[stateIdx] + result = default(Lits) result.idx = exp.delimiterLit() if result.idx == -1: return @@ -230,7 +233,7 @@ func prefix(eNfa: Enfa, uid: NodeUid): Enfa = n.next.setLen 0 # reverse transitions; DFS var stack = @[(state0, -1'i16)] - var visited: set[int16] + var visited: set[int16] = {} template state: untyped = eNfa.s[ni] while stack.len > 0: let (ni, pi) = stack.pop() @@ -283,6 +286,7 @@ func canOpt*(litOpt: LitOpt): bool = func litopt3*(exp: RpnExp, flags: RegexFlags = {}): LitOpt = template litNode: untyped = exp.s[lits2.idx] + result = default(LitOpt) let lits2 = exp.lits(flags) if lits2.idx == -1: return diff --git a/src/regex/nfa.nim b/src/regex/nfa.nim index 96567b9..4515f4a 100644 --- a/src/regex/nfa.nim +++ b/src/regex/nfa.nim @@ -50,6 +50,7 @@ func update( func eNfa*(exp: RpnExp): Enfa {.raises: [RegexError].} = ## Thompson's construction + result = default(Enfa) result.s = newSeq[Node](exp.s.len + 2) result.s.setLen 0 result.s.add initEOENode() @@ -190,7 +191,7 @@ func teClosure( processing: var seq[int16] ) = doAssert processing.len == 0 - var transitions: Transitions + var transitions = default(Transitions) for s in eNfa.s[state].next: teClosure(result, eNfa, s, processing, transitions) @@ -201,6 +202,7 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} = ## Transitions are added in matching order (BFS), ## which may help matching performance #echo eNfa + result = default(Nfa) result.s = newSeq[Node](eNfa.s.len) result.s.setLen 0 var statesMap = newSeq[int16](eNfa.s.len) @@ -209,12 +211,12 @@ func eRemoval*(eNfa: Enfa): Nfa {.raises: [].} = let start = int16(eNfa.s.len-1) result.s.add eNfa.s[start] statesMap[start] = 0'i16 - var closure: TeClosure + var closure = default(TeClosure) var qw = initDeque[int16](2) qw.addFirst start - var qu: set[int16] + var qu: set[int16] = {} qu.incl start - var qa: int16 + var qa = 0'i16 var processing = newSeqOfCap[int16](8) while qw.len > 0: try: @@ -245,7 +247,7 @@ func reverse(eNfa: Enfa): Enfa = for n in mitems result.s: n.next.setLen 0 var stack = @[(state0, -1'i16)] - var visited: set[int16] + var visited: set[int16] = {} template state: untyped = eNfa.s[ni] while stack.len > 0: let (ni, pi) = stack.pop() diff --git a/src/regex/nfafindall2.nim b/src/regex/nfafindall2.nim index 48d290f..f0d9255 100644 --- a/src/regex/nfafindall2.nim +++ b/src/regex/nfafindall2.nim @@ -139,7 +139,7 @@ func dummyMatch*(ms: var RegexMatches2, i: int) = if ms.m.len == 0 or max(ab.a, ab.b) < i: ms.add (-1'i32, i+1 .. i) -func submatch( +func nextState( ms: var RegexMatches2, text: string, regex: Regex, @@ -176,26 +176,9 @@ func submatch( captx = capt while nti < L and isEpsilonTransition(ntn): if matched: - case ntn.kind - of reGroupStart: - if mfNoCaptures notin flags: - captx = capts.diverge captx - capts[captx, ntn.idx].a = i - of reGroupEnd: - if mfNoCaptures notin flags: - captx = capts.diverge captx - capts[captx, ntn.idx].b = i-1 - of assertionKind - lookaroundKind: - matched = match(ntn, cPrev.Rune, c.Rune) - of lookaroundKind: - let freezed = capts.freeze() - matched = lookAround(ntn, capts, captx, text, look, i, flags) - capts.unfreeze freezed - if captx != -1: - capts.keepAlive captx - else: - doAssert false - discard + epsilonMatch( + matched, captx, capts, look, ntn, text, i, cPrev, c.Rune, flags + ) inc nti if matched: if isEoe: @@ -243,7 +226,7 @@ func findSomeImpl*( inc i else: fastRuneAt(text, i, c, true) - submatch(ms, text, regex, iPrev, cPrev, c.int32, flags) + nextState(ms, text, regex, iPrev, cPrev, c.int32, flags) if smA.len == 0: # avoid returning right before final zero-match if i < len(text): @@ -256,7 +239,7 @@ func findSomeImpl*( smA.add (0'i16, -1'i32, i .. i-1) iPrev = i cPrev = c.int32 - submatch(ms, text, regex, iPrev, cPrev, -1'i32, flags) + nextState(ms, text, regex, iPrev, cPrev, -1'i32, flags) doAssert smA.len == 0 if ms.hasMatches(): #debugEcho "m= ", ms.m.s diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim index b6355a1..f31fc83 100644 --- a/src/regex/nfamacro.nim +++ b/src/regex/nfamacro.nim @@ -156,7 +156,7 @@ func genMatch(c: NimNode, n: Node): NimNode = func genSetMatch(c: NimNode, n: Node): NimNode = assert n.kind in {reInSet, reNotSet} - var terms: seq[NimNode] + var terms = newSeq[NimNode]() if n.ranges.len > 0: for bound in n.ranges: let a = newLit bound.a.int32 @@ -164,7 +164,7 @@ func genSetMatch(c: NimNode, n: Node): NimNode = terms.add quote do: `a` <= `c` and `c` <= `b` if n.cps.len > 0: - var caseStmt: seq[NimNode] + var caseStmt = newSeq[NimNode]() caseStmt.add c for cp in n.cps: caseStmt.add newTree(nnkOfBranch, @@ -268,6 +268,7 @@ func genLookaroundMatch( func getEpsilonTransitions(nfa: Nfa, n: Node, nti: int): seq[int] = doAssert not isEpsilonTransition(n) doAssert nti <= n.next.len-1 + result = newSeq[int]() for i in nti+1 .. n.next.len-1: if not isEpsilonTransition(nfa.s[n.next[i]]): break @@ -291,7 +292,7 @@ func genMatchedBody( if eTransitions.len == 0: return quote do: add(`smB`, (`ntLit`, `capt`, `bounds2`)) - var matchedBody: seq[NimNode] + var matchedBody = newSeq[NimNode]() matchedBody.add quote do: `matched` = true `captx` = `capt` @@ -349,14 +350,14 @@ func genNextState( ]# template s: untyped = nfa.s result = newStmtList() - var caseStmtN: seq[NimNode] + var caseStmtN = newSeq[NimNode]() caseStmtN.add n for i in 0 .. s.len-1: if s[i].kind == reEoe: continue if isEpsilonTransition(s[i]): continue - var branchBodyN: seq[NimNode] + var branchBodyN = newSeq[NimNode]() for nti, nt in s[i].next.pairs: if eoeOnly and s[nt].kind != reEoe: continue @@ -436,6 +437,7 @@ func nextState( swap `smA`, `smB` func eoeIdx(nfa: Nfa): int16 = + result = 0'i16 for i in 0 .. nfa.s.len-1: if nfa.s[i].kind == reEoe: return i.int16 @@ -556,7 +558,7 @@ template look(smL: NimNode): untyped = template constructSubmatches2( captures, txt, capts, capt, size: untyped ): untyped = - var bounds: array[size, Slice[int]] + var bounds = default(array[size, Slice[int]]) for i in 0 .. bounds.len-1: bounds[i] = -2 .. -3 var captx = capt @@ -588,13 +590,13 @@ proc matchImpl*(text, expLit, body: NimNode): NimNode = var `smA` = newSubmatches `nfaLenLit` `smB` = newSubmatches `nfaLenLit` - `capts`: Capts + `capts` = default(Capts) `capt` = -1'i32 `matched` = false - `smL` {.used.}: SmLookaround + `smL` {.used.} = default(SmLookaround) `matchImplStmt` if `matched`: - var matches {.used, inject.}: seq[string] + var matches {.used, inject.} = newSeq[string]() when `nfaGroupsLen` > 0: constructSubmatches2( matches, `text`, `capts`, `capt`, `nfaGroupsLen`) diff --git a/src/regex/nfamatch.nim b/src/regex/nfamatch.nim index f37b003..17c994d 100644 --- a/src/regex/nfamatch.nim +++ b/src/regex/nfamatch.nim @@ -204,10 +204,11 @@ func reversedMatchImpl*( look: var Lookaround, start, limit: int ): int = - var capts: Capts + var capts = default(Capts) var captIdx = -1'i32 reversedMatchImpl( - smA, smB, capts, captIdx, text, nfa, look, start, limit) + smA, smB, capts, captIdx, text, nfa, look, start, limit + ) template initLook*: Lookaround = Lookaround( @@ -224,7 +225,7 @@ func matchImpl*( var smA = newSubmatches(regex.nfa.s.len) smB = newSubmatches(regex.nfa.s.len) - capts: Capts + capts = default(Capts) capt = -1'i32 look = initLook() result = matchImpl( @@ -242,7 +243,7 @@ func startsWithImpl*(text: string, regex: Regex, start: int): bool = var smA = newSubmatches(regex.nfa.s.len) smB = newSubmatches(regex.nfa.s.len) - capts: Capts + capts = default(Capts) capt = -1'i32 look = initLook() result = matchImpl( diff --git a/src/regex/nfamatch2.nim b/src/regex/nfamatch2.nim index fb4bcd3..f14868f 100644 --- a/src/regex/nfamatch2.nim +++ b/src/regex/nfamatch2.nim @@ -34,7 +34,7 @@ type behind*: BehindSig smL*: SmLookaround -func lookAround*( +func lookAround( ntn: Node, capts: var Capts3, captIdx: var int32, @@ -76,22 +76,76 @@ func lookAround*( false smL.removeLast() -template nextStateTpl(bwMatch = false): untyped {.dirty.} = - template bounds2: untyped = - when bwMatch: i .. bounds.b else: bounds.a .. i-1 +func epsilonMatch*( + matched: var bool, + captx: var int32, + capts: var Capts3, + look: var Lookaround, + ntn: Node, + text: string, + i: int, + cPrev: int32, + c: Rune, + flags: MatchFlags, + bwMatch = false +) = template captElm: untyped = - capts[captx, nfa.s[nt].idx] - template nt: untyped = nfa.s[n].next[nti] - template ntn: untyped = nfa.s[nt] + capts[captx, ntn.idx] + case ntn.kind + of reGroupStart: + captx = capts.diverge captx + if mfReverseCapts notin flags or + captElm.a == nonCapture.a: + captElm.a = i + of reGroupEnd: + captx = capts.diverge captx + if mfReverseCapts notin flags or + captElm.b == nonCapture.b: + captElm.b = i-1 + of assertionKind - lookaroundKind: + if bwMatch: + matched = match(ntn, c, cPrev.Rune) + else: + matched = match(ntn, cPrev.Rune, c) + of lookaroundKind: + let freezed = capts.freeze() + matched = lookAround(ntn, capts, captx, text, look, i, flags) + capts.unfreeze freezed + if captx != -1: + capts.keepAlive captx + else: + doAssert false + discard + +func nextState( + smA, smB: var Submatches, + capts: var Capts3, + look: var Lookaround, + text: string, + nfa2: Nfa, + i: int, + cPrev: int32, + c: Rune, + flags: MatchFlags, + bwMatch = false +) {.inline.} = + template nfa: untyped = nfa2.s + template bounds2: untyped = + if bwMatch: i .. bounds.b else: bounds.a .. i-1 + template nt: untyped = nfa[n].next[nti] + template ntn: untyped = nfa[nt] + let anchored = mfAnchored in flags + var captx = 0'i32 + var matched = true smB.clear() for n, capt, bounds in items smA: if capt != -1: capts.keepAlive capt - if anchored and nfa.s[n].kind == reEoe: + if anchored and nfa[n].kind == reEoe: if not smB.hasState n: smB.add (n, capt, bounds) break - let L = nfa.s[n].next.len + let L = nfa[n].next.len var nti = 0 while nti < L: let nt0 = nt @@ -101,32 +155,9 @@ template nextStateTpl(bwMatch = false): untyped {.dirty.} = captx = capt while nti < L and isEpsilonTransition(ntn): if matched: - case ntn.kind - of reGroupStart: - # XXX this can be avoided in some cases? - captx = capts.diverge captx - if mfReverseCapts notin flags or - captElm.a == nonCapture.a: - captElm.a = i - of reGroupEnd: - captx = capts.diverge captx - if mfReverseCapts notin flags or - captElm.b == nonCapture.b: - captElm.b = i-1 - of assertionKind - lookaroundKind: - when bwMatch: - matched = match(ntn, c, cPrev.Rune) - else: - matched = match(ntn, cPrev.Rune, c) - of lookaroundKind: - let freezed = capts.freeze() - matched = lookAround(ntn, capts, captx, text, look, i, flags) - capts.unfreeze freezed - if captx != -1: - capts.keepAlive captx - else: - doAssert false - discard + epsilonMatch( + matched, captx, capts, look, ntn, text, i, cPrev, c, flags, bwMatch + ) inc nti if matched: smB.add (nt0, captx, bounds2) @@ -148,8 +179,6 @@ func matchImpl( cPrev = -1'i32 i = start iNext = start - captx = -1'i32 - matched = false let anchored = mfAnchored in flags binFlag = mfBytesInput in flags @@ -166,7 +195,7 @@ func matchImpl( inc iNext else: fastRuneAt(text, iNext, c, true) - nextStateTpl() + nextState(smA, smB, capts, look, text, nfa, i, cPrev, c, flags) if smA.len == 0: return false if anchored and nfa.s[smA[0].ni].kind == reEoe: @@ -174,7 +203,7 @@ func matchImpl( i = iNext cPrev = c.int32 c = Rune(-1) - nextStateTpl() + nextState(smA, smB, capts, look, text, nfa, i, cPrev, c, flags) if smA.len > 0: captIdx = smA[0].ci return smA.len > 0 @@ -197,9 +226,7 @@ func reversedMatchImpl( cPrev = -1'i32 i = start iNext = start - captx: int32 - matched = false - anchored = true + let flags = flags + {mfAnchored} let binFlag = mfBytesInput in flags if start in 0 .. text.len-1: cPrev = if binFlag: @@ -214,7 +241,7 @@ func reversedMatchImpl( dec iNext else: bwFastRuneAt(text, iNext, c) - nextStateTpl(bwMatch = true) + nextState(smA, smB, capts, look, text, nfa, i, cPrev, c, flags, bwMatch = true) if smA.len == 0: return -1 if nfa.s[smA[0].ni].kind == reEoe: @@ -228,7 +255,7 @@ func reversedMatchImpl( dec iNext else: bwFastRuneAt(text, iNext, c) - nextStateTpl(bwMatch = true) + nextState(smA, smB, capts, look, text, nfa, i, cPrev, c, flags, bwMatch = true) for n, capt, bounds in items smA: if nfa.s[n].kind == reEoe: captIdx = capt diff --git a/src/regex/nfatype.nim b/src/regex/nfatype.nim index 981a1ef..dbbd03b 100644 --- a/src/regex/nfatype.nim +++ b/src/regex/nfatype.nim @@ -66,10 +66,13 @@ template fastLog2Tpl(x: Natural): untyped = fastLog2(x) func initCapts3*(groupsLen: int): Capts3 = - result.groupsLen = groupsLen - result.blockSize = max(2, nextPowerOfTwo groupsLen) - result.blockSizeL2 = fastLog2Tpl result.blockSize - result.freezeId = stsFrozen.a + let blockSize = max(2, nextPowerOfTwo groupsLen) + Capts3( + groupsLen: groupsLen, + blockSize: blockSize, + blockSizeL2: fastLog2Tpl blockSize, + freezeId: stsFrozen.a + ) func check(curr, next: CaptState): bool = ## Check if transition from state curr to next is allowed @@ -230,6 +233,7 @@ type MatchFlags* = set[MatchFlag] func toMatchFlags*(f: RegexFlags): MatchFlags = + result = default(MatchFlags) if regexArbitraryBytes in f: result.incl mfBytesInput diff --git a/src/regex/parser.nim b/src/regex/parser.nim index 4e41af0..a8f1e34 100644 --- a/src/regex/parser.nim +++ b/src/regex/parser.nim @@ -175,6 +175,7 @@ func parseOctalLit(sc: Scanner[Rune]): Node = result = Rune(cp).toCharNode func parseCC(s: string): UnicodeCategorySet = + result = default(UnicodeCategorySet) try: result = s.categoryMap.UnicodeCategorySet except ValueError: @@ -235,31 +236,34 @@ func parseEscapedSeq(sc: Scanner[Rune]): Node = case sc.peek of "u".toRune: discard sc.next() - result = parseUnicodeLit(sc, 4) + parseUnicodeLit(sc, 4) of "U".toRune: discard sc.next() - result = parseUnicodeLit(sc, 8) + parseUnicodeLit(sc, 8) of "x".toRune: discard sc.next() case sc.peek of "{".toRune: - result = parseUnicodeLitX(sc) + parseUnicodeLitX(sc) else: - result = parseUnicodeLit(sc, 2) + parseUnicodeLit(sc, 2) of "0".toRune .. "7".toRune: - result = parseOctalLit(sc) + parseOctalLit(sc) of "p".toRune: discard sc.next() - result = parseUnicodeName(sc) + parseUnicodeName(sc) of "P".toRune: discard sc.next() - result = parseUnicodeName(sc) - result.kind = reNotUCC + var node = parseUnicodeName(sc) + node.kind = reNotUCC + node of invalidRune: let startPos = sc.pos prettyCheck(false, "Nothing to escape") + doAssert false + Node() else: - result = next(sc).toEscapedNode + next(sc).toEscapedNode func parseSetEscapedSeq(sc: Scanner[Rune]): Node = ## Just like regular ``parseEscapedSeq`` @@ -392,24 +396,23 @@ func parseSet(sc: Scanner[Rune]): Node = if cps.len == 0: cps.add(cp) continue - var last: Rune - case sc.peek - of "]".toRune: + if sc.peek == "]".toRune: cps.add(cp) continue - of "\\".toRune: - discard sc.next() - let nn = parseSetEscapedSeq(sc) - check( - nn.kind == reChar, - "Invalid set range. Range can't contain " & - "a character-class or assertion", - sc.pos-1, - sc.raw) - last = nn.cp - else: - assert(not sc.finished) - last = sc.next() + var last = case sc.peek + of "\\".toRune: + discard sc.next() + let nn = parseSetEscapedSeq(sc) + check( + nn.kind == reChar, + "Invalid set range. Range can't contain " & + "a character-class or assertion", + sc.pos-1, + sc.raw) + nn.cp + else: + doAssert(not sc.finished) + sc.next() let first = cps.pop() check( first <= last, @@ -453,7 +456,8 @@ func parseRepRange(sc: Scanner[Rune]): Node = return Node(kind: reChar, cp: '{'.Rune) let startPos = sc.pos var - first, last: string + first = "" + last = "" hasFirst = false curr = "" for cp in sc: @@ -482,8 +486,8 @@ func parseRepRange(sc: Scanner[Rune]): Node = if last.len == 0: # {n,} last = "-1" var - firstNum: int - lastNum: int + firstNum = 0 + lastNum = 0 try: discard parseInt(first, firstNum) discard parseInt(last, lastNum) @@ -556,13 +560,12 @@ func parseGroupTag(sc: Scanner[Rune]): Node = # A regular group let startPos = sc.pos if sc.peek != "?".toRune: - result = initGroupStart() - return + return initGroupStart() discard sc.next() # Consume "?" - case sc.peek + result = case sc.peek of ":".toRune: discard sc.next() - result = initGroupStart(isCapturing = false) + initGroupStart(isCapturing = false) of "P".toRune: discard sc.next() prettyCheck( @@ -589,7 +592,7 @@ func parseGroupTag(sc: Scanner[Rune]): Node = prettyCheck( sc.prev == ">".toRune, "Invalid group name. Missing `>`") - result = initGroupStart(name) + initGroupStart(name) of "i".toRune, "m".toRune, "s".toRune, @@ -610,44 +613,39 @@ func parseGroupTag(sc: Scanner[Rune]): Node = flags.add toNegFlag(cp) else: flags.add toFlag(cp) - result = if sc.prev == ")".toRune: + if sc.prev == ")".toRune: Node(kind: reFlags, flags: flags) else: - initGroupStart( - flags = flags, - isCapturing = false) + initGroupStart(flags = flags, isCapturing = false) #reLookahead, #reLookbehind, of '='.Rune, '<'.Rune, '!'.Rune: - var lookAroundKind: NodeKind - case sc.peek - of '='.Rune: - lookAroundKind = reLookahead - of '!'.Rune: - lookAroundKind = reNotLookahead - of '<'.Rune: - discard sc.next() - case sc.peek: - of '='.Rune: - lookAroundKind = reLookbehind - of '!'.Rune: - lookAroundKind = reNotLookbehind + var lookAroundKind = case sc.peek + of '='.Rune: reLookahead + of '!'.Rune: reNotLookahead + of '<'.Rune: + discard sc.next() + case sc.peek: + of '='.Rune: reLookbehind + of '!'.Rune: reNotLookbehind + else: + prettyCheck( + false, "Invalid lookabehind, expected `<=` or `\w*)", m) check m.group(0) == @[0..5] block: - var m: RegexMatch + var m = RegexMatch() check "ab".match(re"(a)(b)", m) check m.group(0) == @[0..0] check m.group(1) == @[1..1] block: - var m: RegexMatch + var m = RegexMatch() check match("ab", re"(a)(b)", m) check m.toStrCaptures("ab") == @[@["a"], @["b"]] @@ -681,7 +681,7 @@ test "tgroup": let expected = ["a", "b", "c"] text = "abc" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?P\w)+", m) var i = 0 for bounds in m.group("foo"): @@ -691,7 +691,7 @@ test "tgroup": let expected = ["a", "b", "c"] text = "abc" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(\w)+", m) var i = 0 for bounds in m.group(0): @@ -700,21 +700,21 @@ test "tgroup": test "tnamed_groups": block: - var m: RegexMatch + var m = RegexMatch() check "foobar".match(re"(?P\w*)", m) check m.group("foo") == @[0..5] block: - var m: RegexMatch + var m = RegexMatch() check "foobar".match(re"(?P(?P\w*))", m) check m.group("foo") == @[0..5] check m.group("bar") == @[0..5] block: - var m: RegexMatch + var m = RegexMatch() check "aab".match(re"(?P(?Pa)*b)", m) check m.group("foo") == @[0..2] check m.group("bar") == @[0..0, 1..1] block: - var m: RegexMatch + var m = RegexMatch() check "aab".match(re"((?Pa)*b)", m) check m.group("bar") == @[0..0, 1..1] @@ -926,49 +926,49 @@ test "tescaped_sequences": test "tfind": block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"bc", m) block: - var m: RegexMatch + var m = RegexMatch() check(not "abcd".find(re"ac", m)) block: - var m: RegexMatch + var m = RegexMatch() check "a".find(re"", m) block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"^abcd$", m) check "2222".findWithCapt(re"(22)*") == @[@["22", "22"]] block: - var m: RegexMatch + var m = RegexMatch() check "2222".find(re"(22)*", m) check m.group(0) == @[0 .. 1, 2 .. 3] block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"(ab)", m) check m.group(0) == @[0 .. 1] block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"(bc)", m) check m.group(0) == @[1 .. 2] block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"(cd)", m) check m.group(0) == @[2 .. 3] block: - var m: RegexMatch + var m = RegexMatch() check "abcd".find(re"bc", m) check m.boundaries == 1 .. 2 block: - var m: RegexMatch + var m = RegexMatch() check "aΪⒶ弢".find(re"Ϊ", m) check m.boundaries == 1 .. 2 block: - var m: RegexMatch + var m = RegexMatch() check "aΪⒶ弢".find(re"Ⓐ", m) check m.boundaries == 3 .. 5 block: - var m: RegexMatch + var m = RegexMatch() check "aΪⒶ弢".find(re"弢", m) check m.boundaries == 6 .. 9 @@ -1344,17 +1344,17 @@ test "treplace": test "tmisc": block: - var m: RegexMatch + var m = RegexMatch() check "abc".match(re"[^^]+", m) check m.boundaries == 0 .. 2 check(not "^".isMatch(re"[^^]+")) block: - var m: RegexMatch + var m = RegexMatch() check "kpd".match(re"[^al-obc]+", m) check m.boundaries == 0 .. 2 check(not "abc".isMatch(re"[^al-obc]+")) block: - var m: RegexMatch + var m = RegexMatch() check "almocb".match(re"[al-obc]+", m) check m.boundaries == 0 .. 5 check(not "defzx".isMatch(re"[al-obc]+")) @@ -1362,7 +1362,7 @@ test "tmisc": # From http://www.regular-expressions.info/examples.html # Grabbing HTML Tags block: - var m: RegexMatch + var m = RegexMatch() check "onetwothree".find(re"]*>(.*?)", m) check m.boundaries == 3 .. 16 check("onetwothree".findWithCapt( @@ -1385,7 +1385,7 @@ test "tmisc": check(not "127.0.0.999".isMatch(ip)) # Floating Point Numbers block: - var m: RegexMatch + var m = RegexMatch() check "3.14".find(re"^[-+]?[0-9]*\.?[0-9]+$", m) check m.boundaries == 0 .. 3 check "1.602e-19".findWithCapt( @@ -1400,7 +1400,7 @@ test "tmisc": [a-zA-Z]{2,4} \b """ - var m: RegexMatch + var m = RegexMatch() check "john@server.department.company.com".find(email, m) check m.boundaries == 0 .. 33 check(not "john@aol...com".isMatch(email)) @@ -1459,25 +1459,25 @@ test "tmisc": # Unicode block: - var m: RegexMatch + var m = RegexMatch() check "①②③".find(re"①②③", m) check m.boundaries == 0 ..< "①②③".len block: - var m: RegexMatch + var m = RegexMatch() check "①②③④⑤".find(re"①②③", m) check m.boundaries == 0 ..< "①②③".len block: - var m: RegexMatch + var m = RegexMatch() check "①②③".find(re"①(②)③", m) check m.boundaries == 0 ..< "①②③".len check "①②③".findWithCapt(re"①(②)③") == @[@["②"]] block: - var m: RegexMatch + var m = RegexMatch() check "①②③".find(re"[①②③]*", m) check m.boundaries == 0 ..< "①②③".len # block: - var m: RegexMatch + var m = RegexMatch() check "①②③".find(re"[^④⑤]*", m) check m.boundaries == 0 ..< "①②③".len @@ -1502,7 +1502,7 @@ test "tnegative_look_around": check "ab".matchWithCapt(re"(\w(?(?P\w*))", m) check m.group("foo") == @[0..5] check m.group("bar") == @[0..5] @@ -1833,20 +1833,20 @@ test "tisInitialized": test "capturingGroupsNames": block: let text = "hello world" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?Pworld)", m) check m.groupsCount == 2 for name in @["greet", "who"]: check m.groupNames.contains(name) block: let text = "hello world" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?Pworld)", m) check m.group("greet", text) == @["hello"] check m.group("who", text) == @["world"] block: let text = "hello world foo bar" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?:(?P[^\s]+)\s?)+", m) check m.group("greet", text) == @["hello"] let whoGroups = m.group("who", text) @@ -1854,77 +1854,77 @@ test "capturingGroupsNames": check whoGroups.contains(w) block: let text = "hello world" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?Pworld)", m) check m.groupFirstCapture("greet", text) == "hello" check m.groupFirstCapture("who", text) == "world" block: let text = "hello world her" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?Pworld) (?Pher)", m) check m.groupFirstCapture("greet", text) == "hello" block: let text = "hello world foo bar" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?:(?P[^\s]+)\s?)+", m) # "who" captures @["world", "foo", "bar"] check m.groupFirstCapture("who", text) == "world" block: let text = "hello" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello)\s?(?Pworld)?", m) check m.groupFirstCapture("greet", text) == "hello" check m.groupFirstCapture("who", text) == "" block: let text = "hello world her" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?Pworld) (?Pher)", m) check m.groupLastCapture("who", text) == "her" block: let text = "hello world foo bar" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello) (?:(?P[^\s]+)\s?)+", m) # "who" captures @["world", "foo", "bar"] check m.groupLastCapture("who", text) == "bar" block: let text = "hello" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(?Phello)\s?(?Pworld)?", m) check m.groupLastCapture("greet", text) == "hello" check m.groupLastCapture("who", text) == "" block: let text = "hello" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello)\s?(world)?", m) check m.groupLastCapture(0, text) == "hello" check m.groupLastCapture(1, text) == "" block: let text = "" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello)?\s?(world)?", m) check m.groupLastCapture(0, text) == "" check m.groupLastCapture(1, text) == "" block: let text = "hello world foo bar" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello) (?:([^\s]+)\s?)+", m) # "who" captures @["world", "foo", "bar"] check m.groupLastCapture(1, text) == "bar" block: let text = "hello" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello)\s?(world)?", m) check m.groupFirstCapture(0, text) == "hello" check m.groupFirstCapture(1, text) == "" block: let text = "" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello)?\s?(world)?", m) check m.groupFirstCapture(0, text) == "" check m.groupFirstCapture(1, text) == "" block: let text = "hello world foo bar" - var m: RegexMatch + var m = RegexMatch() check text.match(re"(hello) (?:([^\s]+)\s?)+", m) # "who" captures @["world", "foo", "bar"] check m.groupFirstCapture(1, text) == "world" @@ -1932,7 +1932,7 @@ test "capturingGroupsNames": # XXX raise a compile error when regex contains unicode # in ascii mode test "tflags": - var m: RegexMatch + var m = RegexMatch() #check match("abc", re(r"abc", {reAscii}), m) check match("弢弢弢", re"\w{3}", m) #check(not match("弢弢弢", re(r"\w{3}", {reAscii}), m)) @@ -1944,7 +1944,7 @@ test "tflags": #check(not "%弢弢%".find(re(r"\w{2}", {reAscii}), m)) test "tfindopt": - var m: RegexMatch + var m = RegexMatch() check(not find("bar", re"foo", m)) check(not find("bar", re"baz", m)) check "abcd".find(re"bc", m) @@ -2150,7 +2150,7 @@ test "tfindallopt": @[0 .. 10, 14 .. 25] test "tmisc2": - var m: RegexMatch + var m = RegexMatch() check "onetwotree".find(re".*?", m) check m.boundaries == 3 .. 16 check "onetwotree".find(re"[\w<>/]*?", m) @@ -2228,7 +2228,7 @@ test "tmisc2": m.captures == @[@[0 .. 6, 7 .. 10], @[0 .. 0, 1 .. 1, 2 .. 2], @[4 .. 6, 8 .. 10]] test "tmisc2_5": - var m: RegexMatch + var m = RegexMatch() check match("abd", re"((ab)c)|((ab)d)", m) and m.captures == @[@[], @[], @[0 .. 2], @[0 .. 1]] check match("aaa", re"(a*)", m) and @@ -2291,7 +2291,7 @@ test "tmisc2_6": check(not match("1", re1)) test "tmisc3": - var m: RegexMatch + var m = RegexMatch() block: # issue #61 const a = "void __mingw_setusermatherr (int (__attribute__((__cdecl__)) *)(struct _exception *));" check replace(a, re"__attribute__[ ]*\(\(.*?\)\)([ ,;])", "$1") == @@ -2416,7 +2416,7 @@ test "tmisc3": check split("foobar", re"o(?=b)") == @["fo", "bar"] check split("foobar", re"o(?!b)") == @["f", "obar"] block: - var m: RegexMatch + var m = RegexMatch() check find("abcxyz", re"(abc)|\w+", m) check m.boundaries == 0 .. 2 check find("xyzabc", re"(abc)|\w+", m) @@ -2515,7 +2515,7 @@ test "issue_98": check match("1.1", re"(\d+)\.(\d+)(\.(\d+)|)(\.(\d+)|)") test "issue_101": - var m: RegexMatch + var m = RegexMatch() check match("TXT1/TXT2.1", re"(TXT1)/TXT2()\.(\d+)") check match("TXT1/TXT2.1", re"(TXT1)/TXT2(?:)\.(\d+)") check match("TXT1/TXT2.1", re"(TXT1)/TXT2(?i:)\.(\d+)") diff --git a/tests/tests2.nim b/tests/tests2.nim index 8c7baa2..07a9a61 100644 --- a/tests/tests2.nim +++ b/tests/tests2.nim @@ -29,11 +29,11 @@ template expect(exception: typedesc, body: untyped): untyped = when defined(forceRegexAtRuntime): proc isMatch(s: string, pattern: Regex2): bool = - var m: RegexMatch2 + var m = RegexMatch2() result = match(s, pattern, m) else: proc isMatch(s: string, pattern: static Regex2): bool = - var m: RegexMatch2 + var m = RegexMatch2() result = match(s, pattern, m) proc raises(pattern: string): bool = @@ -50,14 +50,14 @@ proc raisesMsg(pattern: string): string = result = getCurrentExceptionMsg() proc matchWithCapt(s: string, pattern: static Regex2): seq[string] = - var m: RegexMatch2 + var m = RegexMatch2() check match(s, pattern, m) result.setLen m.captures.len for i, bounds in m.captures.pairs: result[i] = s[bounds] proc matchWithBounds(s: string, pattern: static Regex2): seq[Slice[int]] = - var m: RegexMatch2 + var m = RegexMatch2() check match(s, pattern, m) return m.captures @@ -67,7 +67,7 @@ proc toStrCaptures(m: RegexMatch2, s: string): seq[string] = result[i] = s[bounds] proc findWithCapt(s: string, pattern: Regex2): seq[string] = - var m: RegexMatch2 + var m = RegexMatch2() check find(s, pattern, m) result = m.toStrCaptures(s) @@ -1041,11 +1041,11 @@ test "tgreediness": check "aaa".matchWithCapt(re2"(a){1,}?(a){1,}(a)?") == @["a", "a", ""] check "aaa".matchWithCapt(re2"(a){1,}?(a){1,}?(a)?") == @["a", "a", "a"] block: - var m: RegexMatch2 + var m = RegexMatch2() check match("aaaa", re2"(a*?)(a*?)(a*)", m) check m.toStrCaptures("aaaa") == @["", "", "aaaa"] block: - var m: RegexMatch2 + var m = RegexMatch2() check match("aaaa", re2"(a*)(a*?)(a*?)", m) check m.toStrCaptures("aaaa") == @["aaaa", "", ""] @@ -1072,48 +1072,48 @@ test "tdot_any_matcher": test "tgroup": block: - var m: RegexMatch2 + var m = RegexMatch2() check "foobar".match(re2"(\w*)", m) check m.group(0) == 0..5 block: - var m: RegexMatch2 + var m = RegexMatch2() check "foobar".match(re2"(?P\w*)", m) check m.group(0) == 0..5 block: - var m: RegexMatch2 + var m = RegexMatch2() check "ab".match(re2"(a)(b)", m) check m.group(0) == 0..0 check m.group(1) == 1..1 block: - var m: RegexMatch2 + var m = RegexMatch2() check match("ab", re2"(a)(b)", m) check m.toStrCaptures("ab") == @["a", "b"] block: - var m: RegexMatch2 + var m = RegexMatch2() check "abc".match(re2"(?P\w)+", m) check m.group("foo") == 2..2 block: - var m: RegexMatch2 + var m = RegexMatch2() check "abc".match(re2"(\w)+", m) check m.group(0) == 2..2 test "tnamed_groups": block: - var m: RegexMatch2 + var m = RegexMatch2() check "foobar".match(re2"(?P\w*)", m) check m.group("foo") == 0..5 block: - var m: RegexMatch2 + var m = RegexMatch2() check "foobar".match(re2"(?P(?P\w*))", m) check m.group("foo") == 0..5 check m.group("bar") == 0..5 block: - var m: RegexMatch2 + var m = RegexMatch2() check "aab".match(re2"(?P(?Pa)*b)", m) check m.group("foo") == 0..2 check m.group("bar") == 1..1 block: - var m: RegexMatch2 + var m = RegexMatch2() check "aab".match(re2"((?Pa)*b)", m) check m.group("bar") == 1..1 @@ -1318,47 +1318,47 @@ test "tescaped_sequences": test "tfind": block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"bc", m) block: - var m: RegexMatch2 + var m = RegexMatch2() check(not "abcd".find(re2"ac", m)) block: - var m: RegexMatch2 + var m = RegexMatch2() check "a".find(re2"", m) block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"^abcd$", m) block: - var m: RegexMatch2 + var m = RegexMatch2() check "2222".find(re2"(22)*", m) check m.group(0) == 2 .. 3 block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"(ab)", m) check m.group(0) == 0 .. 1 block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"(bc)", m) check m.group(0) == 1 .. 2 block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"(cd)", m) check m.group(0) == 2 .. 3 block: - var m: RegexMatch2 + var m = RegexMatch2() check "abcd".find(re2"bc", m) check m.boundaries == 1 .. 2 block: - var m: RegexMatch2 + var m = RegexMatch2() check "aΪⒶ弢".find(re2"Ϊ", m) check m.boundaries == 1 .. 2 block: - var m: RegexMatch2 + var m = RegexMatch2() check "aΪⒶ弢".find(re2"Ⓐ", m) check m.boundaries == 3 .. 5 block: - var m: RegexMatch2 + var m = RegexMatch2() check "aΪⒶ弢".find(re2"弢", m) check m.boundaries == 6 .. 9 @@ -1753,17 +1753,17 @@ test "treplace": test "tmisc": block: - var m: RegexMatch2 + var m = RegexMatch2() check "abc".match(re2"[^^]+", m) check m.boundaries == 0 .. 2 check(not "^".isMatch(re2"[^^]+")) block: - var m: RegexMatch2 + var m = RegexMatch2() check "kpd".match(re2"[^al-obc]+", m) check m.boundaries == 0 .. 2 check(not "abc".isMatch(re2"[^al-obc]+")) block: - var m: RegexMatch2 + var m = RegexMatch2() check "almocb".match(re2"[al-obc]+", m) check m.boundaries == 0 .. 5 check(not "defzx".isMatch(re2"[al-obc]+")) @@ -1771,7 +1771,7 @@ test "tmisc": # From http://www.regular-expressions.info/examples.html # Grabbing HTML Tags block: - var m: RegexMatch2 + var m = RegexMatch2() check "onetwothree".find(re2"]*>(.*?)", m) check m.boundaries == 3 .. 16 check("onetwothree".findWithCapt( @@ -1794,7 +1794,7 @@ test "tmisc": check(not "127.0.0.999".isMatch(ip)) # Floating Point Numbers block: - var m: RegexMatch2 + var m = RegexMatch2() check "3.14".find(re2"^[-+]?[0-9]*\.?[0-9]+$", m) check m.boundaries == 0 .. 3 check "1.602e-19".findWithCapt( @@ -1809,7 +1809,7 @@ test "tmisc": [a-zA-Z]{2,4} \b """ - var m: RegexMatch2 + var m = RegexMatch2() check "john@server.department.company.com".find(email, m) check m.boundaries == 0 .. 33 check(not "john@aol...com".isMatch(email)) @@ -1868,25 +1868,25 @@ test "tmisc": # Unicode block: - var m: RegexMatch2 + var m = RegexMatch2() check "①②③".find(re2"①②③", m) check m.boundaries == 0 ..< "①②③".len block: - var m: RegexMatch2 + var m = RegexMatch2() check "①②③④⑤".find(re2"①②③", m) check m.boundaries == 0 ..< "①②③".len block: - var m: RegexMatch2 + var m = RegexMatch2() check "①②③".find(re2"①(②)③", m) check m.boundaries == 0 ..< "①②③".len check "①②③".findWithCapt(re2"①(②)③") == @["②"] block: - var m: RegexMatch2 + var m = RegexMatch2() check "①②③".find(re2"[①②③]*", m) check m.boundaries == 0 ..< "①②③".len # block: - var m: RegexMatch2 + var m = RegexMatch2() check "①②③".find(re2"[^④⑤]*", m) check m.boundaries == 0 ..< "①②③".len @@ -1911,7 +1911,7 @@ test "tnegative_look_around": check "ab".matchWithCapt(re2"(\w(?(?P\w*))", m) check m.group("foo") == 0..5 check m.group("bar") == 0..5 @@ -2242,61 +2242,61 @@ test "tisInitialized": test "capturingGroupsNames": block: let text = "hello world" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello) (?Pworld)", m) check m.groupsCount == 2 for name in @["greet", "who"]: check m.groupNames.contains(name) block: let text = "hello world" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello) (?Pworld)", m) check m.group("greet", text) == "hello" check m.group("who", text) == "world" block: let text = "hello world foo bar" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello) (?:(?P[^\s]+)\s?)+", m) check m.group("greet", text) == "hello" check m.group("who", text) == "bar" block: let text = "hello world her" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello) (?Pworld) (?Pher)", m) check m.group("who", text) == "her" block: let text = "hello world foo bar" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello) (?:(?P[^\s]+)\s?)+", m) check m.group("who", text) == "bar" block: let text = "hello" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(?Phello)\s?(?Pworld)?", m) check m.group("greet", text) == "hello" check m.group("who", text) == "" block: let text = "hello" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(hello)\s?(world)?", m) check m.group(0, text) == "hello" check m.group(1, text) == "" block: let text = "" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(hello)?\s?(world)?", m) check m.group(0, text) == "" check m.group(1, text) == "" block: let text = "hello world foo bar" - var m: RegexMatch2 + var m = RegexMatch2() check text.match(re2"(hello) (?:([^\s]+)\s?)+", m) check m.group(1, text) == "bar" # XXX raise a compile error when regex contains unicode # in ascii mode test "tflags": - var m: RegexMatch2 + var m = RegexMatch2() #check match("abc", re2(r"abc", {reAscii}), m) check match("弢弢弢", re2"\w{3}", m) #check(not match("弢弢弢", re2(r"\w{3}", {reAscii}), m)) @@ -2308,7 +2308,7 @@ test "tflags": #check(not "%弢弢%".find(re2(r"\w{2}", {reAscii}), m)) test "tfindopt": - var m: RegexMatch2 + var m = RegexMatch2() check(not find("bar", re2"foo", m)) check(not find("bar", re2"baz", m)) check "abcd".find(re2"bc", m) @@ -2514,7 +2514,7 @@ test "tfindallopt": @[0 .. 10, 14 .. 25] test "tmisc2": - var m: RegexMatch2 + var m = RegexMatch2() check "onetwotree".find(re2".*?", m) check m.boundaries == 3 .. 16 check "onetwotree".find(re2"[\w<>/]*?", m) @@ -2589,7 +2589,7 @@ test "tmisc2": m.captures == @[7 .. 10, 2 .. 2, 8 .. 10] test "tmisc2_5": - var m: RegexMatch2 + var m = RegexMatch2() check match("abd", re2"((ab)c)|((ab)d)", m) and m.captures == @[nonCapture, nonCapture, 0 .. 2, 0 .. 1] check match("aaa", re2"(a*)", m) and @@ -2652,7 +2652,7 @@ test "tmisc2_6": check(not match("1", re1)) test "tmisc3": - var m: RegexMatch2 + var m = RegexMatch2() block: # issue #61 const a = "void __mingw_setusermatherr (int (__attribute__((__cdecl__)) *)(struct _exception *));" check replace(a, re2"__attribute__[ ]*\(\(.*?\)\)([ ,;])", "$1") == @@ -2777,7 +2777,7 @@ test "tmisc3": check split("foobar", re2"o(?=b)") == @["fo", "bar"] check split("foobar", re2"o(?!b)") == @["f", "obar"] block: - var m: RegexMatch2 + var m = RegexMatch2() check find("abcxyz", re2"(abc)|\w+", m) check m.boundaries == 0 .. 2 check find("xyzabc", re2"(abc)|\w+", m) @@ -2990,7 +2990,7 @@ test "issue_98": check match("1.1", re2"(\d+)\.(\d+)(\.(\d+)|)(\.(\d+)|)") test "issue_101": - var m: RegexMatch2 + var m = RegexMatch2() check match("TXT1/TXT2.1", re2"(TXT1)/TXT2()\.(\d+)") check match("TXT1/TXT2.1", re2"(TXT1)/TXT2(?:)\.(\d+)") check match("TXT1/TXT2.1", re2"(TXT1)/TXT2(?i:)\.(\d+)") @@ -3011,7 +3011,7 @@ test "issue_101": check(not match("A", re2"(?xi:(?xi) )a")) test "tlookaround_captures": - var m: RegexMatch2 + var m = RegexMatch2() check match("aaab", re2"((\w+)|a(a+)b(?<=^(a+)(b)))", m) and m.captures == @[0 .. 3, 0 .. 3, nonCapture, nonCapture, nonCapture] check match("aaab", re2"a(a+)b(?<=^(a+)(b))", m) and @@ -3074,7 +3074,7 @@ test "tverifyutf8": check raisesMsg("\xff") == "Invalid utf-8 regex" raisesInvalidUtf8 match("\xff", re2"abc") block: - var m: RegexMatch2 + var m = RegexMatch2() raisesInvalidUtf8 match("\xff", re2"abc", m) raisesInvalidUtf8 findAll("\xff", re2"abc") raisesInvalidUtf8 findAllBounds("\xff", re2"abc") @@ -3170,7 +3170,7 @@ when not defined(js) or NimMajor >= 2: check replace("ΪⒶ弢ΪⒶ弢x", re2(r"ΪⒶ弢\w", flags), "abc") == "ΪⒶ弢abc" check replace("ΪⒶ弢ΪⒶ弢x", re2(r"(?u)ΪⒶ弢\w", flags), "abc") == "abc\xaaⒶ弢x" block: - var m: RegexMatch2 + var m = RegexMatch2() check match("a", re2(r"a", flags)) and m.groupsCount == 0 check match("\x02\xF8\x95", re2(r"\x{2F895}", flags)) and @@ -3287,7 +3287,7 @@ test "tvarflags": var flags = {regexDotAll} check match("a\L", re2(r"a.", flags)) block: - var m: RegexMatch2 + var m = RegexMatch2() check match("aa", re2(r"(a*)(a*)", {regexUngreedy}), m) and m.captures == @[0 .. -1, 0 .. 1] check match("aa", re2"(?U)(a*)(a*)", m) and