Skip to content

Commit

Permalink
chore: prepare v3.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ifplusor committed Aug 12, 2020
1 parent ace4986 commit 3e036e7
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 57 deletions.
6 changes: 3 additions & 3 deletions actrie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python
# coding=utf-8
# encoding=utf-8

from .matcher import Matcher, Context, PrefixMatcher

__all__ = ['Matcher', 'Context', 'PrefixMatcher']
__all__ = ["Matcher", "Context", "PrefixMatcher"]

__version__ = '3.0.9'
__version__ = "3.1.0"
7 changes: 2 additions & 5 deletions actrie/context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# coding=utf-8
# encoding=utf-8

from collections import Iterator

Expand All @@ -8,7 +8,6 @@


class Context(Iterator):

def __init__(self, matcher, content=None, return_byte_pos=False):
if matcher is None:
raise MatcherError("Matcher is None")
Expand All @@ -32,8 +31,7 @@ def reset(self, content=None, return_byte_pos=None):
self._content = convert2pass(content)
if return_byte_pos is not None:
self._return_byte_pos = return_byte_pos
self._uninitialized = not _actrie.ResetContext(
self._context, self._content, self._return_byte_pos)
self._uninitialized = not _actrie.ResetContext(self._context, self._content, self._return_byte_pos)
if self._uninitialized:
raise MatcherError("Reset context failed!")

Expand All @@ -55,7 +53,6 @@ def next(self):


class PrefixContext(Context):

def _next(self):
if self._uninitialized:
raise None
Expand Down
3 changes: 1 addition & 2 deletions actrie/error.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# coding=utf-8
# encoding=utf-8


class MatcherError(Exception):

def __init__(self, value):
self.value = value

Expand Down
36 changes: 23 additions & 13 deletions actrie/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,60 +9,71 @@


class Matcher:

def __init__(self):
self._matcher = 0

def __del__(self):
_actrie.Destruct(self._matcher)

def load_from_file(self, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def load_from_file(
self, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
if self._matcher:
raise MatcherError("Matcher is already initialized.")
if not os.path.isfile(path):
return False
self._matcher = _actrie.ConstructByFile(
convert2pass(path), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra)
convert2pass(path), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra
)
return self._matcher != 0

@classmethod
def create_by_file(cls, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def create_by_file(
cls, path, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
matcher = cls()
if matcher.load_from_file(path, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra):
return matcher
return None

def load_from_string(self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def load_from_string(
self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
if self._matcher:
raise MatcherError("Matcher is already initialized.")
if keywords is None:
return False
self._matcher = _actrie.ConstructByString(
convert2pass(keywords), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra)
convert2pass(keywords), all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra
)
return self._matcher != 0

@classmethod
def create_by_string(cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def create_by_string(
cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
matcher = cls()
if matcher.load_from_string(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra):
return matcher
return None

def load_from_collection(self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def load_from_collection(
self, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
if self._matcher:
raise MatcherError("Matcher is already initialized.")
if isinstance(keywords, list) or isinstance(keywords, set):
# for utf-8 '\n' is 0x0a, in other words, utf-8 is ascii compatible.
# but in python3, str.join is only accept str as argument
keywords = "\n".join(
[convert2pass(keyword) for keyword in keywords
if convert2pass(keyword)])
keywords = "\n".join([convert2pass(keyword) for keyword in keywords if convert2pass(keyword)])
else:
raise MatcherError("Keywords should be list or set.")
return self.load_from_string(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra)

@classmethod
def create_by_collection(cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True):
def create_by_collection(
cls, keywords, all_as_plain=False, ignore_bad_pattern=False, bad_as_plain=True, deduplicate_extra=True
):
matcher = cls()
if matcher.load_from_collection(keywords, all_as_plain, ignore_bad_pattern, bad_as_plain, deduplicate_extra):
return matcher
Expand Down Expand Up @@ -99,7 +110,6 @@ def search(self, content, return_byte_pos=False):


class PrefixMatcher(Matcher):

def match(self, content, return_byte_pos=False):
if not self._matcher:
raise MatcherError("Matcher is not initialized.")
Expand Down
15 changes: 9 additions & 6 deletions actrie/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

try:
from enum import Enum
except:
except Exception:

class Enum(object):
pass


from .tokenizer import Tokenizer
from .util import convert2unicode, is_py3k

Expand Down Expand Up @@ -149,7 +151,7 @@ def _parse_all(tokenizer, stop=None):

@abstractmethod
def ustr(self):
return u''
return u""


class PurePattern(Pattern):
Expand All @@ -165,7 +167,7 @@ def ustr(self):
return self.text

def __str__(self):
return self.ustr() if is_py3k else self.ustr().encode('utf-8')
return self.ustr() if is_py3k else self.ustr().encode("utf-8")


class AlternationPattern(Pattern):
Expand All @@ -181,7 +183,7 @@ def ustr(self):
return u"(" + u"|".join([p.ustr() for p in self.alters]) + u")"

def __str__(self):
return self.ustr() if is_py3k else self.ustr().encode('utf-8')
return self.ustr() if is_py3k else self.ustr().encode("utf-8")


class AntiAmbiguousPattern(Pattern):
Expand All @@ -195,7 +197,7 @@ def ustr(self):
return self.origin.ustr() + u"(?&!" + u"|".join([p.ustr() for p in self.ambis]) + u")"

def __str__(self):
return self.ustr() if is_py3k else self.ustr().encode('utf-8')
return self.ustr() if is_py3k else self.ustr().encode("utf-8")


class DistancePattern(Pattern):
Expand All @@ -210,4 +212,5 @@ def ustr(self):
return self.head.ustr() + u".{0," + convert2unicode(str(self.dist)) + u"}" + self.tail.ustr()

def __str__(self):
return self.ustr() if is_py3k else self.ustr().encode('utf-8')
return self.ustr() if is_py3k else self.ustr().encode("utf-8")

3 changes: 1 addition & 2 deletions actrie/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def __str__(self):


class Tokenizer:

def __init__(self, text):
self.text = text
self.cur = 0
Expand Down Expand Up @@ -73,7 +72,7 @@ def consume_integer(self):

def expect(self, s, move=True):
wlen = len(s)
if len(self.text) - self.cur >= wlen and self.text[self.cur:(self.cur + wlen)] == s:
if len(self.text) - self.cur >= wlen and self.text[self.cur : (self.cur + wlen)] == s:
if move:
self.cur += wlen
return True
Expand Down
82 changes: 57 additions & 25 deletions actrie/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,66 @@

import sys

is_py3k = bool(sys.version_info[0] == 3)
try:
from typing import AnyStr
except Exception:
pass

is_py2k = bool(sys.version_info[0] == 2)

def convert2pass(obj):
if is_py3k:
if isinstance(obj, bytes):
return obj.decode("utf-8")
else:
if isinstance(obj, unicode):
return obj.encode("utf-8")
return obj
if not is_py2k:

def convert2pass(string):
# type: (AnyStr) -> str
if isinstance(string, bytes):
return string.decode("utf-8")
return string

def convert2unicode(obj):
if is_py3k:
if isinstance(obj, bytes):
return obj.decode("utf-8")
else:
if isinstance(obj, str):
return obj.decode("utf-8")
return obj
def convert2unicode(string):
# type: (AnyStr) -> str
if isinstance(string, bytes):
return string.decode("utf-8")
return string


def replace_escap(word):
if is_py3k or isinstance(str, unicode):
for old, new in ((u"\\", u"\\\\"), (u"(", u"\\("), (u")", u"\\)"), (u"{", u"\\{"), (u".", u"\\."), (u"|", u"\\|")):
word = word.replace(old, new)
else:
for old, new in (("\\", "\\\\"), ("(", "\\("), (")", "\\)"), ("{", "\\{"), (".", "\\."), ("|", "\\|")):
def replace_escap(word):
for old, new in (
(u"\\", u"\\\\"),
(u"(", u"\\("),
(u")", u"\\)"),
(u"{", u"\\{"),
(u".", u"\\."),
(u"|", u"\\|"),
):
word = word.replace(old, new)
return word
return word


else:

def convert2pass(string):
# type: (AnyStr) -> str
if isinstance(string, unicode):
return string.encode("utf-8")
return string

def convert2unicode(string):
# type: (AnyStr) -> unicode
if isinstance(string, str):
return string.decode("utf-8")
return string

def replace_escap(word):
if isinstance(word, unicode):
for old, new in (
(u"\\", u"\\\\"),
(u"(", u"\\("),
(u")", u"\\)"),
(u"{", u"\\{"),
(u".", u"\\."),
(u"|", u"\\|"),
):
word = word.replace(old, new)
else:
for old, new in (("\\", "\\\\"), ("(", "\\("), (")", "\\)"), ("{", "\\{"), (".", "\\."), ("|", "\\|")):
word = word.replace(old, new)
return word
2 changes: 1 addition & 1 deletion jni/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

<groupId>psn.ifplusor</groupId>
<artifactId>actrie</artifactId>
<version>3.0.9</version>
<version>3.1.0</version>
<packaging>nar</packaging>

<name>actrie</name>
Expand Down

0 comments on commit 3e036e7

Please sign in to comment.