From c3e0f10f171cc0bae9fa154e67c4f24181052679 Mon Sep 17 00:00:00 2001 From: Abhishek Singh Date: Sun, 24 Nov 2024 21:24:37 -0800 Subject: [PATCH 1/3] Added precommit --- .coveragerc | 2 +- .github/workflows/lexpy_build.yaml | 2 +- .gitignore | 2 +- .pre-commit-config.yaml | 16 ++ README.md | 35 ++- lexpy/__init__.py | 2 +- lexpy/_base/automata.py | 151 ++++++++----- lexpy/_base/node.py | 20 +- lexpy/_utils.py | 31 ++- lexpy/dawg.py | 25 ++- lexpy/exceptions.py | 2 +- lexpy/tests/data/BIGFILES.txt | 26 +-- lexpy/tests/data/OSPD2.txt | 2 +- lexpy/tests/data/TWL06.txt | 2 +- lexpy/tests/data/ridyhew_master.txt | 2 +- lexpy/tests/data/word4.txt | 2 +- lexpy/tests/data/words2.txt | 2 +- lexpy/tests/test_dawg.py | 335 +++++++++++++++++++--------- lexpy/tests/test_trie.py | 275 +++++++++++++++-------- lexpy/tests/test_word_count.py | 24 +- lexpy/trie.py | 16 +- lexpy/utils.py | 2 +- pyproject.toml | 2 +- setup.cfg | 6 +- setup.py | 75 ++++--- 25 files changed, 663 insertions(+), 396 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.coveragerc b/.coveragerc index 1460233..d7811e2 100644 --- a/.coveragerc +++ b/.coveragerc @@ -12,4 +12,4 @@ omit = [report] [html] -directory = htmlcov \ No newline at end of file +directory = htmlcov diff --git a/.github/workflows/lexpy_build.yaml b/.github/workflows/lexpy_build.yaml index d1516fc..610e50f 100644 --- a/.github/workflows/lexpy_build.yaml +++ b/.github/workflows/lexpy_build.yaml @@ -9,7 +9,7 @@ jobs: fail-fast: false matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9', 'pypy-3.10'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9', 'pypy-3.10'] steps: - name: Checkout diff --git a/.gitignore b/.gitignore index f446531..6c5b94e 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,4 @@ build dawg_sample.py compare_trie_dawg_size.py compare_trie_dawg_time.py -venv \ No newline at end of file +venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c5e3bdc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/psf/black + rev: 24.10.0 + hooks: + - id: black +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 diff --git a/README.md b/README.md index 713ddb4..f97791a 100644 --- a/README.md +++ b/README.md @@ -19,13 +19,13 @@ -- A lexicon is a data-structure which stores a set of words. The difference between -a dictionary and a lexicon is that in a lexicon there are **no values** associated with the words. +- A lexicon is a data-structure which stores a set of words. The difference between +a dictionary and a lexicon is that in a lexicon there are **no values** associated with the words. - A lexicon is similar to a list or a set of words, but the internal representation is different and optimized -for faster searches of words, prefixes and wildcard patterns. +for faster searches of words, prefixes and wildcard patterns. -- Given a word, precisely, the search time is O(W) where W is the length of the word. +- Given a word, precisely, the search time is O(W) where W is the length of the word. - 2 important lexicon data-structures are **_Trie_** and **_Directed Acyclic Word Graph (DAWG)_**. @@ -61,10 +61,10 @@ from lexpy import Trie trie = Trie() -input_words = ['ampyx', 'abuzz', 'athie', 'athie', 'athie', 'amato', 'amato', 'aneto', 'aneto', 'aruba', - 'arrow', 'agony', 'altai', 'alisa', 'acorn', 'abhor', 'aurum', 'albay', 'arbil', 'albin', - 'almug', 'artha', 'algin', 'auric', 'sore', 'quilt', 'psychotic', 'eyes', 'cap', 'suit', - 'tank', 'common', 'lonely', 'likeable' 'language', 'shock', 'look', 'pet', 'dime', 'small' +input_words = ['ampyx', 'abuzz', 'athie', 'athie', 'athie', 'amato', 'amato', 'aneto', 'aneto', 'aruba', + 'arrow', 'agony', 'altai', 'alisa', 'acorn', 'abhor', 'aurum', 'albay', 'arbil', 'albin', + 'almug', 'artha', 'algin', 'auric', 'sore', 'quilt', 'psychotic', 'eyes', 'cap', 'suit', + 'tank', 'common', 'lonely', 'likeable' 'language', 'shock', 'look', 'pet', 'dime', 'small' 'dusty', 'accept', 'nasty', 'thrill', 'foot', 'steel', 'steel', 'steel', 'steel', 'abuzz'] trie.add_all(input_words) # You can pass any sequence types or a file-like object here @@ -170,7 +170,7 @@ print(trie.search_within_distance('arie', dist=2, with_count=True)) # Directed Acyclic Word Graph (DAWG) - DAWG supports the same set of operations as a Trie. The difference is the number of nodes in a DAWG is always -less than or equal to the number of nodes in Trie. +less than or equal to the number of nodes in Trie. - They both are Deterministic Finite State Automata. However, DAWG is a minimized version of the Trie DFA. @@ -210,10 +210,10 @@ The APIs are exactly same as the Trie APIs from lexpy import DAWG dawg = DAWG() -input_words = ['ampyx', 'abuzz', 'athie', 'athie', 'athie', 'amato', 'amato', 'aneto', 'aneto', 'aruba', - 'arrow', 'agony', 'altai', 'alisa', 'acorn', 'abhor', 'aurum', 'albay', 'arbil', 'albin', - 'almug', 'artha', 'algin', 'auric', 'sore', 'quilt', 'psychotic', 'eyes', 'cap', 'suit', - 'tank', 'common', 'lonely', 'likeable' 'language', 'shock', 'look', 'pet', 'dime', 'small' +input_words = ['ampyx', 'abuzz', 'athie', 'athie', 'athie', 'amato', 'amato', 'aneto', 'aneto', 'aruba', + 'arrow', 'agony', 'altai', 'alisa', 'acorn', 'abhor', 'aurum', 'albay', 'arbil', 'albin', + 'almug', 'artha', 'algin', 'auric', 'sore', 'quilt', 'psychotic', 'eyes', 'cap', 'suit', + 'tank', 'common', 'lonely', 'likeable' 'language', 'shock', 'look', 'pet', 'dime', 'small' 'dusty', 'accept', 'nasty', 'thrill', 'foot', 'steel', 'steel', 'steel', 'steel', 'abuzz'] @@ -317,7 +317,7 @@ print(dawg.search('thrill', with_count=True)) ## Special Characters -Special characters, except `?` and `*`, are matched literally. +Special characters, except `?` and `*`, are matched literally. ```python from lexpy import Trie @@ -357,15 +357,12 @@ These are some ideas which I would love to work on next in that order. Pull requ - Merge trie and DAWG features in one data structure - Support all functionalities and still be as compressed as possible. - Serialization / Deserialization - - Pickle is definitely an option. + - Pickle is definitely an option. - Server (TCP or HTTP) to serve queries over the network. # Fun Facts 1. The 45-letter word pneumonoultramicroscopicsilicovolcanoconiosis is the longest English word that appears in a major dictionary. -So for all english words, the search time is bounded by O(45). +So for all english words, the search time is bounded by O(45). 2. The longest technical word(not in dictionary) is the name of a protein called as [titin](https://en.wikipedia.org/wiki/Titin). It has 189,819 letters and it is disputed whether it is a word. - - - diff --git a/lexpy/__init__.py b/lexpy/__init__.py index 5ea3e6b..d33c4ba 100644 --- a/lexpy/__init__.py +++ b/lexpy/__init__.py @@ -2,4 +2,4 @@ from lexpy.trie import Trie from lexpy.dawg import DAWG -__all__ = ['Trie', 'DAWG'] +__all__ = ["Trie", "DAWG"] diff --git a/lexpy/_base/automata.py b/lexpy/_base/automata.py index ef4e669..b7aa9b3 100644 --- a/lexpy/_base/automata.py +++ b/lexpy/_base/automata.py @@ -9,7 +9,7 @@ class FSA: """ - __slots__ = '_id', '_num_of_words', 'root' + __slots__ = "_id", "_num_of_words", "root" def __init__(self, root): self._id = 1 @@ -28,7 +28,7 @@ def __contains__(self, word): Returns: :returns contains (boolean) True or False """ - if word == '': + if word == "": return True # The root is an empty string. So it is always present if word is None: return False @@ -45,18 +45,19 @@ def __contains__(self, word): def __contains_prefix(self, prefix): """ Description: - Checks whether the prefix is present in the DAWG. If yes, returns (True, node) where the prefix ends else + Checks whether the prefix is present in the DAWG. + If yes, returns (True, node) where the prefix ends else returns (False, None) Arguments: :arg (str) prefix: The Prefix string Returns: - :returns (tuple)(exists, node): If yes, returns (True, node) where the prefix ends else - returns (False, None) + :(tuple)(exists, node): If yes, returns (True, node) + where the prefix ends else returns (False, None) """ - if prefix == '': + if prefix == "": return True, self.root if prefix is None: return False, None @@ -71,7 +72,8 @@ def __contains_prefix(self, prefix): def contains_prefix(self, prefix): """ Description: - Returns a boolean indicating the presence of prefix in the DAWG data-structure + Returns a boolean indicating the presence of prefix in + the DAWG data-structure Arguments: :arg (str) prefix: The Prefix string @@ -88,10 +90,12 @@ def __words_with_wildcard(node, wildcard, index, current_word="", with_count=Fal """ Description: Returns all the words where the wildcard pattern matches. - This method uses backtracking to recursively traverse nodes in the DAWG for wildcard characters '?' and '*' + This method uses backtracking to recursively traverse nodes + in the DAWG for wildcard characters '?' and '*' Args: - :arg node (lexpy._base.node.FSANode): Current Node in the Finite State Automaton + :arg node (lexpy._base.node.FSANode): Current Node in the + Finite State Automaton :arg wildcard (str) : The wildcard pattern as input @@ -100,14 +104,18 @@ def __words_with_wildcard(node, wildcard, index, current_word="", with_count=Fal :arg current_word (str): Word formed till now Returns: - :returns words(list): Returns the list of words where the wildcard pattern matches. + :returns words(list): Returns the list of words where the wildcard + pattern matches. """ if not node or not wildcard or index < 0: return [] if node.eow and index >= len(wildcard) and current_word: - return [(current_word, node.count)] if with_count else [current_word] + if with_count: + return [(current_word, node.count)] + else: + return [current_word] if index >= len(wildcard): return [] @@ -115,45 +123,52 @@ def __words_with_wildcard(node, wildcard, index, current_word="", with_count=Fal words = [] letter = wildcard[index] - if letter == '?': + if letter == "?": for child in node.children: child_node = node[child] - child_words = FSA.__words_with_wildcard(child_node, - wildcard, - index + 1, - current_word + child, - with_count=with_count) + child_words = FSA.__words_with_wildcard( + child_node, + wildcard, + index + 1, + current_word + child, + with_count=with_count, + ) words.extend(child_words) - elif letter == '*': - words_at_current_level = FSA.__words_with_wildcard(node, - wildcard, - index + 1, - current_word, - with_count=with_count) + elif letter == "*": + words_at_current_level = FSA.__words_with_wildcard( + node, wildcard, index + 1, current_word, with_count=with_count + ) words.extend(words_at_current_level) if node.children: for child in node.children: child_node = node[child] - child_words = FSA.__words_with_wildcard(child_node, - wildcard, - index, - current_word + child, - with_count=with_count) + child_words = FSA.__words_with_wildcard( + child_node, + wildcard, + index, + current_word + child, + with_count=with_count, + ) words.extend(child_words) elif node.eow and index == len(wildcard) - 1: - return [(current_word, node.count)] if with_count else [current_word] + if with_count: + return [(current_word, node.count)] + else: + return [current_word] else: if letter in node.children: child_node = node[letter] - child_words = FSA.__words_with_wildcard(child_node, - wildcard, - index + 1, - current_word + child_node.val, - with_count=with_count) + child_words = FSA.__words_with_wildcard( + child_node, + wildcard, + index + 1, + current_word + child_node.val, + with_count=with_count, + ) words.extend(child_words) return words @@ -167,29 +182,30 @@ def search(self, wildcard, with_count=False): :arg wildcard(str) : The wildcard pattern as input Returns: - :returns words(list): Returns the list of words where the wildcard pattern matches. + :returns words(list): Returns the list of words where + the wildcard pattern matches. """ if not wildcard: return [] wildcard = validate_expression(wildcard) - return FSA.__words_with_wildcard(self.root, - wildcard, - 0, - self.root.val, - with_count=with_count) + return FSA.__words_with_wildcard( + self.root, wildcard, 0, self.root.val, with_count=with_count + ) def search_with_prefix(self, prefix, with_count=False): """ Description: - Returns a list of words which share the same prefix as passed in input. The words are by default sorted - in the increasing order of length. + Returns a list of words which share the same prefix as passed in + input. The words are by default sorted in the increasing order of + length. Arguments: :arg (str) prefix: The Prefix string Returns: - :returns (list) words: which share the same prefix as passed in input + :returns (list) words: which share the same prefix as passed + in input """ if not prefix: @@ -197,7 +213,7 @@ def search_with_prefix(self, prefix, with_count=False): _, node = self.__contains_prefix(prefix) if node is None: return [] - return FSA.__words_with_wildcard(node, '*', 0, prefix, with_count=with_count) + return FSA.__words_with_wildcard(node, "*", 0, prefix, with_count=with_count) def add_all(self, source): """ @@ -220,7 +236,7 @@ def add_all(self, source): if isinstance(source, str) and not os.path.exists(source): raise IOError("File does not exists") - if isinstance(source, str) or hasattr(source, 'read'): + if isinstance(source, str) or hasattr(source, "read"): source = gen_source(source) for word in source: @@ -240,29 +256,48 @@ def search_within_distance(self, word, dist=0, with_count=False): row = list(range(len(word) + 1)) words = [] for child in self.root.children: - self._search_within_distance(word, self.root.children[child], - child, child, words, - row, dist, with_count=with_count) + self._search_within_distance( + word, + self.root.children[child], + child, + child, + words, + row, + dist, + with_count=with_count, + ) return words - def _search_within_distance(self, word, node, letter, new_word, words, row, dist=0, with_count=False): + def _search_within_distance( + self, word, node, letter, new_word, words, row, dist=0, with_count=False + ): cols = len(word) + 1 curr_row = [row[0] + 1] for col in range(1, cols): - i = curr_row[col-1] + 1 + i = curr_row[col - 1] + 1 d = row[col] + 1 - if word[col-1] != letter: - r = row[col-1] + 1 + if word[col - 1] != letter: + r = row[col - 1] + 1 else: - r = row[col-1] + r = row[col - 1] curr_row.append(min(i, d, r)) if curr_row[-1] <= dist and node.eow: - words.append((new_word, node.count)) if with_count else words.append(new_word) + ( + words.append((new_word, node.count)) + if with_count + else words.append(new_word) + ) if min(curr_row) <= dist: for child_node in node.children: - self._search_within_distance(word, node.children[child_node], - child_node, new_word+child_node, - words, curr_row, dist, - with_count=with_count) + self._search_within_distance( + word, + node.children[child_node], + child_node, + new_word + child_node, + words, + curr_row, + dist, + with_count=with_count, + ) diff --git a/lexpy/_base/node.py b/lexpy/_base/node.py index 28d1662..3a98d6d 100644 --- a/lexpy/_base/node.py +++ b/lexpy/_base/node.py @@ -1,11 +1,12 @@ class FSANode: """ - Class for Finite State Automaton(FSA) Node. Both Trie and Directed Acyclic Word Graph (DAWG) node definitions + Class for Finite State Automaton(FSA) Node. + Both Trie and Directed Acyclic Word Graph (DAWG) node definitions inherit from this class. """ - __slots__ = 'id', 'val', 'children', 'eow', 'count' + __slots__ = "id", "val", "children", "eow", "count" def __init__(self, _id, val): """ @@ -38,8 +39,9 @@ def add_child(self, letter, _id=None): def __getitem__(self, letter): """ Description: - Returns the child node. To use this method first check if the key is present in the dictionary of children - edges or use default as None + Returns the child node. To use this method first check if the + key is present in the dictionary of children edges or use default + as None Args: :arg (str) The letter(or label) corresponding to the child node @@ -52,7 +54,8 @@ def __getitem__(self, letter): def __str__(self): """ Description: - Outputs a string representation of the FSA node. This is invoked when str(`FSANode`) is called. + Outputs a string representation of the FSA node. + This is invoked when str(`FSANode`) is called. :return: """ @@ -91,7 +94,10 @@ def __hash__(self): def __repr__(self): """ Description: - Returns a nicely formatted string of the FSA node. This is invoked when `repr()` is called. + Returns a nicely formatted string of the FSA node. + This is invoked when `repr()` is called. :return: """ - return "{0}(id={1}, label={2}, EOW={3}, count={4})".format(self.__class__.__name__, self.id, self.val, self.eow, self.count) + return "{0}(id={1}, label={2}, EOW={3}, count={4})".format( + self.__class__.__name__, self.id, self.val, self.eow, self.count + ) diff --git a/lexpy/_utils.py b/lexpy/_utils.py index 97dd105..a90296b 100644 --- a/lexpy/_utils.py +++ b/lexpy/_utils.py @@ -1,16 +1,17 @@ import re from contextlib import closing -__all__ = ['validate_expression', 'gen_source'] +__all__ = ["validate_expression", "gen_source"] -PATTERN_FOR_WILDCARD_SEARCH = re.compile(r'(?:(\*\?)+|(\?\*)+|\*+)') -PATTERN_FOR_CONSECUTIVE_QUESTION_MARK = re.compile(r'\?+') +PATTERN_FOR_WILDCARD_SEARCH = re.compile(r"(?:(\*\?)+|(\?\*)+|\*+)") +PATTERN_FOR_CONSECUTIVE_QUESTION_MARK = re.compile(r"\?+") def validate_expression(wildcard_expression): """ Description: - Validates and shortens the wild card expression(if needed) without changing the intended meaning . + Validates and shortens the wild card expression(if needed) + without changing the intended meaning . Args: :arg (str) wild card expression @@ -19,34 +20,32 @@ def validate_expression(wildcard_expression): :return (str) A shortened copy of the wild card expression. Raises: - :raises (``InvalidWildCardExpressionError``) Any error while validating the expression. + :raises (``InvalidWildCardExpressionError``) + Any error while validating the expression. Example: >>> from lexpy._utils import validate_expression - >>> sample_expr = 'a*?' # Match literal `a` followed by any character Zero or unlimited times. + >>> # Match literal `a` followed by any character + >>> # Zero or unlimited times. + >>> sample_expr = 'a*?' >>> print(validate_expression(sample_expr)) # Outputs 'a*' """ # Replace with single * - result = re.sub(PATTERN_FOR_WILDCARD_SEARCH, '*', wildcard_expression) + result = re.sub(PATTERN_FOR_WILDCARD_SEARCH, "*", wildcard_expression) # Replace with a single ? - result = re.sub(PATTERN_FOR_CONSECUTIVE_QUESTION_MARK, '?', result) + result = re.sub(PATTERN_FOR_CONSECUTIVE_QUESTION_MARK, "?", result) return result def gen_source(source): - """ - - """ - if hasattr(source, 'read'): + """ """ + if hasattr(source, "read"): input_file = source else: - input_file = open(source, 'r') + input_file = open(source, "r") with closing(input_file): for line in input_file: yield line.strip() - - - diff --git a/lexpy/dawg.py b/lexpy/dawg.py index 5c6178f..2db693c 100644 --- a/lexpy/dawg.py +++ b/lexpy/dawg.py @@ -1,25 +1,34 @@ from lexpy._base.node import FSANode from lexpy._base.automata import FSA -__all__ = ['DAWG'] +__all__ = ["DAWG"] class DAWG(FSA): - __slots__ = 'root', '__prev_word', '__prev_node', '__minimized_nodes', '__unchecked_nodes' + __slots__ = ( + "root", + "__prev_word", + "__prev_node", + "__minimized_nodes", + "__unchecked_nodes", + ) def __init__(self): - root = FSANode(1, '') + root = FSANode(1, "") super(DAWG, self).__init__(root=root) - self.__prev_word = '' + self.__prev_word = "" self.__prev_node = root self.__minimized_nodes = {} self.__unchecked_nodes = [] def add(self, word, count=1): if word < self.__prev_word: - raise ValueError(f"Words should be inserted in alphabetical order\n" - f"Previous word was '{self.__prev_word}' and current word is '{word}'") + raise ValueError( + f"Words should be inserted in alphabetical order\n" + f"Previous word was '{self.__prev_word}' " + f"and current word is '{word}'" + ) elif word == self.__prev_word: self.__prev_node.count += count else: @@ -77,7 +86,5 @@ def add_all(self, source): super(DAWG, self).add_all(source=source) def __len__(self): - """Returns the number of nodes in DAWG instance - - """ + """Returns the number of nodes in DAWG instance""" return len(self.__minimized_nodes) diff --git a/lexpy/exceptions.py b/lexpy/exceptions.py index 7087f64..46586f3 100644 --- a/lexpy/exceptions.py +++ b/lexpy/exceptions.py @@ -9,4 +9,4 @@ def __init__(self, expr, message): self.message = message def __str__(self): - return repr(': '.join([self.message, self.expr])) + return repr(": ".join([self.message, self.expr])) diff --git a/lexpy/tests/data/BIGFILES.txt b/lexpy/tests/data/BIGFILES.txt index e5f07d4..694bd99 100644 --- a/lexpy/tests/data/BIGFILES.txt +++ b/lexpy/tests/data/BIGFILES.txt @@ -512,7 +512,7 @@ aliza alka alkarim alkire -all the +all the alla allahdin allahyar @@ -2300,7 +2300,7 @@ bevis bevon bevvy bevyn -beware. +beware. beygui beymer bezanson @@ -6377,7 +6377,7 @@ dolorita dolph dolson dom -domains +domains domanico domas dombrosk @@ -6529,7 +6529,7 @@ dosenbac doshi dosi doskas -dosref +dosref doss dost dot @@ -8581,7 +8581,7 @@ galasso galbrait galdwin gale -gale +gale galen galewski galina @@ -10730,7 +10730,7 @@ holbrook holcomb holcombe holcroft -hold of +hold of holdaway holden holder @@ -11776,7 +11776,7 @@ jard jareb jared jarel -jargon +jargon jarib jarid jarl @@ -11967,7 +11967,7 @@ jennilee jennine jennings jenny -jeno +jeno jens jensen jensenwo @@ -14325,7 +14325,7 @@ lazer lazlo lazure lazzara -lcarrol +lcarrol le lea leader @@ -17516,7 +17516,7 @@ moussa mousseau moussett moveline -movies +movies mowat mowbray mowle @@ -20165,7 +20165,7 @@ pyng pyong pyotr pyron -python +python qadir qadri qainfo @@ -20228,7 +20228,7 @@ quintina quinton quintus quinz -quite a +quite a quixote quizmast quoc @@ -25097,7 +25097,7 @@ trefry trefts tregenza treisman -trek @ +trek @ tremain tremaine tremayne diff --git a/lexpy/tests/data/OSPD2.txt b/lexpy/tests/data/OSPD2.txt index daefcde..c9ab0ac 100644 --- a/lexpy/tests/data/OSPD2.txt +++ b/lexpy/tests/data/OSPD2.txt @@ -79336,4 +79336,4 @@ ZYMOSIS ZYMOTIC ZYMURGY ZYZZYVA -ZYZZYVAS \ No newline at end of file +ZYZZYVAS diff --git a/lexpy/tests/data/TWL06.txt b/lexpy/tests/data/TWL06.txt index 0cc22a2..0d46e2a 100644 --- a/lexpy/tests/data/TWL06.txt +++ b/lexpy/tests/data/TWL06.txt @@ -178688,4 +178688,4 @@ ZYMURGIES ZYMURGY ZYZZYVA ZYZZYVAS -ZZZ \ No newline at end of file +ZZZ diff --git a/lexpy/tests/data/ridyhew_master.txt b/lexpy/tests/data/ridyhew_master.txt index 6bdfe90..85eb39e 100644 --- a/lexpy/tests/data/ridyhew_master.txt +++ b/lexpy/tests/data/ridyhew_master.txt @@ -459023,4 +459023,4 @@ ZYTHEPSARY ZYTHUM ZYTHUMS ZYZZYVA -ZYZZYVAS \ No newline at end of file +ZYZZYVAS diff --git a/lexpy/tests/data/word4.txt b/lexpy/tests/data/word4.txt index 5f250f4..48336cf 100644 --- a/lexpy/tests/data/word4.txt +++ b/lexpy/tests/data/word4.txt @@ -7,4 +7,4 @@ яхтсменками яхтсменками яхтсмены -ячеей \ No newline at end of file +ячеей diff --git a/lexpy/tests/data/words2.txt b/lexpy/tests/data/words2.txt index 4e415fd..d737139 100644 --- a/lexpy/tests/data/words2.txt +++ b/lexpy/tests/data/words2.txt @@ -5,4 +5,4 @@ mango apple ash ashley -simpson \ No newline at end of file +simpson diff --git a/lexpy/tests/test_dawg.py b/lexpy/tests/test_dawg.py index c1b7333..a91bc44 100644 --- a/lexpy/tests/test_dawg.py +++ b/lexpy/tests/test_dawg.py @@ -6,17 +6,21 @@ HERE = os.path.dirname(__file__) -large_dataset = os.path.join(HERE, 'data/ridyhew_master.txt') -small_dataset = os.path.join(HERE, 'data/TWL06.txt') +large_dataset = os.path.join(HERE, "data/ridyhew_master.txt") +small_dataset = os.path.join(HERE, "data/TWL06.txt") class TestWordCount(unittest.TestCase): def test_word_count_greater_than_zero(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashes', 'ashley']) + self.dawg.add_all(["ash", "ashes", "ashley"]) self.dawg.reduce() - self.assertGreater(self.dawg.get_word_count(), 0, "The number of words should be greater than 0") + self.assertGreater( + self.dawg.get_word_count(), + 0, + "The number of words should be greater than 0", + ) self.assertEqual(3, self.dawg.get_word_count(), "Word count not equal") def test_word_count_zero(self): @@ -30,90 +34,104 @@ class TestDAWGExactWordSearch(unittest.TestCase): def test_word_in_dawg(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertTrue('ash' in self.dawg, "Word should be in dawg") + self.assertTrue("ash" in self.dawg, "Word should be in dawg") def test_word_not_int_dawg1(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertFalse('salary' in self.dawg, "Word should not be in dawg") - + self.assertFalse("salary" in self.dawg, "Word should not be in dawg") + def test_word_not_int_dawg2(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertFalse('mash lolley' in self.dawg, "Word should not be in dawg") + self.assertFalse("mash lolley" in self.dawg, "Word should not be in dawg") class TesDAWGWordInsert(unittest.TestCase): def test_word_add(self): self.dawg = DAWG() - self.dawg.add('axe') - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('axe' in self.dawg, "Word should be in dawg") - + self.dawg.add("axe") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("axe" in self.dawg, "Word should be in dawg") def test_word_add_all_list(self): self.dawg = DAWG() - self.dawg.add_all(['axe', 'kick']) #list + self.dawg.add_all(["axe", "kick"]) # list self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('axe' in self.dawg, "Word should be in dawg") - self.assertTrue('kick' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("axe" in self.dawg, "Word should be in dawg") + self.assertTrue("kick" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") def test_word_add_all_set(self): self.dawg = DAWG() - self.dawg.add_all({'axe', 'kick'}) #set + self.dawg.add_all({"axe", "kick"}) # set self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('axe' in self.dawg, "Word should be in dawg") - self.assertTrue('kick' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("axe" in self.dawg, "Word should be in dawg") + self.assertTrue("kick" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") def test_word_add_all_tuple(self): self.dawg = DAWG() - self.dawg.add_all(('axe', 'kick')) #tuple + self.dawg.add_all(("axe", "kick")) # tuple self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('axe' in self.dawg, "Word should be in dawg") - self.assertTrue('kick' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("axe" in self.dawg, "Word should be in dawg") + self.assertTrue("kick" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") def test_word_add_all_with_number(self): self.dawg = DAWG() - self.dawg.add_all(('axe', 'kick')) #tuple with one integer. + self.dawg.add_all(("axe", "kick")) # tuple with one integer. self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('axe' in self.dawg, "Word should be in dawg") - self.assertTrue('kick' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("axe" in self.dawg, "Word should be in dawg") + self.assertTrue("kick" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") def test_word_add_all_gen(self): def gen_words(): - a = ['ash', 'ashley', 'simpson'] + a = ["ash", "ashley", "simpson"] for word in a: yield word + self.dawg = DAWG() - self.dawg.add_all(gen_words()) # generator + self.dawg.add_all(gen_words()) # generator self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") - self.assertTrue('simpson' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") + self.assertTrue("simpson" in self.dawg, "Word should be in dawg") self.assertEqual(3, self.dawg.get_word_count(), "Word count not equal") def test_word_add_all_file_path(self): self.dawg = DAWG() - self.dawg.add_all(small_dataset) # From a file + self.dawg.add_all(small_dataset) # From a file self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('AARGH' in self.dawg, "Word should be in dawg") - self.assertTrue('AARRGHH' in self.dawg, "Word should be in dawg") - self.assertTrue('AAS' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("AARGH" in self.dawg, "Word should be in dawg") + self.assertTrue("AARRGHH" in self.dawg, "Word should be in dawg") + self.assertTrue("AAS" in self.dawg, "Word should be in dawg") self.assertEqual(178691, self.dawg.get_word_count(), "Word count not equal") @@ -121,11 +139,13 @@ class TestDAWGNodeCount(unittest.TestCase): def test_dawg_node_count(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") self.assertEqual(6, len(self.dawg), "Number of nodes") @@ -139,105 +159,164 @@ def test_dawg_reduced_node_count(self): class TestDAWGPrefixExists(unittest.TestCase): def test_dawg_node_prefix_exists(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") - self.assertTrue(self.dawg.contains_prefix('ash'), "Prefix should be present in DAWG") - self.assertTrue(self.dawg.contains_prefix('as'), "Prefix should be present in DAWG") - self.assertTrue(self.dawg.contains_prefix('a'), "Prefix should be present in DAWG") + self.assertTrue( + self.dawg.contains_prefix("ash"), "Prefix should be present in DAWG" + ) + self.assertTrue( + self.dawg.contains_prefix("as"), "Prefix should be present in DAWG" + ) + self.assertTrue( + self.dawg.contains_prefix("a"), "Prefix should be present in DAWG" + ) def test_dawg_node_prefix_not_exists(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") self.assertEqual(2, self.dawg.get_word_count(), "Word count not equal") - self.assertFalse(self.dawg.contains_prefix('xmas'), "Prefix should be present in DAWG") - self.assertFalse(self.dawg.contains_prefix('xor'), "Prefix should be present in DAWG") - self.assertFalse(self.dawg.contains_prefix('sh'), "Prefix should be present in DAWG") + self.assertFalse( + self.dawg.contains_prefix("xmas"), "Prefix should be present in DAWG" + ) + self.assertFalse( + self.dawg.contains_prefix("xor"), "Prefix should be present in DAWG" + ) + self.assertFalse( + self.dawg.contains_prefix("sh"), "Prefix should be present in DAWG" + ) class TestDAWGPrefixSearch(unittest.TestCase): def test_dawg_prefix_search(self): self.dawg = DAWG() - self.dawg.add_all(['ashlame', 'ashley', 'ashlo', 'askoiu']) + self.dawg.add_all(["ashlame", "ashley", "ashlo", "askoiu"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertFalse('ash' in self.dawg, "Word should not be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertFalse("ash" in self.dawg, "Word should not be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") self.assertEqual(4, self.dawg.get_word_count(), "Word count not equal") - self.assertTrue(self.dawg.contains_prefix('ash'), "Prefix should be present in DAWG") - self.assertEqual(sorted(self.dawg.search_with_prefix('ash')), sorted(['ashlame', 'ashley', 'ashlo']), - 'The lists should be equal') + self.assertTrue( + self.dawg.contains_prefix("ash"), "Prefix should be present in DAWG" + ) + self.assertEqual( + sorted(self.dawg.search_with_prefix("ash")), + sorted(["ashlame", "ashley", "ashlo"]), + "The lists should be equal", + ) class TestWildCardSearch(unittest.TestCase): def test_dawg_asterisk_search(self): self.dawg = DAWG() - self.dawg.add_all(['ash', 'ashley']) + self.dawg.add_all(["ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") - self.assertEqual(sorted(self.dawg.search('a*')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.dawg.search('a?*')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.dawg.search('a*?')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.dawg.search('a***')), sorted(['ash', 'ashley']), 'The lists should be equal') + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") + self.assertEqual( + sorted(self.dawg.search("a*")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.dawg.search("a?*")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.dawg.search("a*?")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.dawg.search("a***")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) def test_dawg_question_search(self): self.dawg = DAWG() - self.dawg.add_all(['ab', 'as', 'ash', 'ashley']) + self.dawg.add_all(["ab", "as", "ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") - self.assertEqual(sorted(self.dawg.search('a?')), sorted(['ab', 'as']), 'The lists should be equal') + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") + self.assertEqual( + sorted(self.dawg.search("a?")), + sorted(["ab", "as"]), + "The lists should be equal", + ) def test_dawg_wildcard_search(self): self.dawg = DAWG() - self.dawg.add_all(['ab', 'as', 'ash', 'ashley']) + self.dawg.add_all(["ab", "as", "ash", "ashley"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") - self.assertEqual(sorted(self.dawg.search('*a******?')), sorted(['ab', 'as', 'ash', 'ashley']), - 'The lists should be equal') + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") + self.assertEqual( + sorted(self.dawg.search("*a******?")), + sorted(["ab", "as", "ash", "ashley"]), + "The lists should be equal", + ) def test_dawg_wildcard_exception(self): self.dawg = DAWG() - self.dawg.add_all(['ab', 'as', 'ash', 'ashley', '#$%^a']) + self.dawg.add_all(["ab", "as", "ash", "ashley", "#$%^a"]) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ash' in self.dawg, "Word should be in dawg") - self.assertTrue('ashley' in self.dawg, "Word should be in dawg") - self.assertTrue('#$%^a' in self.dawg) - # self.assertRaises(InvalidWildCardExpressionError, self.dawg.search, '#$%^a') + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ash" in self.dawg, "Word should be in dawg") + self.assertTrue("ashley" in self.dawg, "Word should be in dawg") + self.assertTrue("#$%^a" in self.dawg) + + # self.assertRaises(InvalidWildCardExpressionError, self.dawg.search, '#$%^a') class TestBuildFromFile(unittest.TestCase): def test_dawg_build_from_file_path(self): self.dawg = build_dawg_from_file(small_dataset) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ZYGOMORPHIES' in self.dawg, "Word should be in dawg") - self.assertTrue('ZYGOMATA' in self.dawg, "Word should be in dawg") - self.assertTrue('ZYGOMORPHY' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ZYGOMORPHIES" in self.dawg, "Word should be in dawg") + self.assertTrue("ZYGOMATA" in self.dawg, "Word should be in dawg") + self.assertTrue("ZYGOMORPHY" in self.dawg, "Word should be in dawg") self.assertEqual(178691, self.dawg.get_word_count(), "Word count not equal") def test_dawg_build_from_file_object(self): - with open(small_dataset, 'r') as input_file: + with open(small_dataset, "r") as input_file: self.dawg = build_dawg_from_file(input_file) self.dawg.reduce() - self.assertIsInstance(self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`") - self.assertTrue('ZYGOMORPHIES' in self.dawg, "Word should be in dawg") - self.assertTrue('ZYGOMATA' in self.dawg, "Word should be in dawg") - self.assertTrue('ZYGOMORPHY' in self.dawg, "Word should be in dawg") + self.assertIsInstance( + self.dawg, DAWG, "Object should be of type `lexpy.dawg.DAWG`" + ) + self.assertTrue("ZYGOMORPHIES" in self.dawg, "Word should be in dawg") + self.assertTrue("ZYGOMATA" in self.dawg, "Word should be in dawg") + self.assertTrue("ZYGOMORPHY" in self.dawg, "Word should be in dawg") self.assertEqual(178691, self.dawg.get_word_count(), "Word count not equal") @@ -245,15 +324,55 @@ class TestSearchWithinDistance(unittest.TestCase): def test_edit_distance_search(self): self.dawg = DAWG() - input_words = ['abhor', 'abuzz', 'accept', 'acorn', 'agony', 'albay', 'albin', 'algin', 'alisa', 'almug', - 'altai', 'amato', 'ampyx', 'aneto', 'arbil', 'arrow', 'artha', 'aruba', 'athie', 'auric', - 'aurum', 'cap', 'common', 'dime', 'eyes', 'foot', 'likeablelanguage', 'lonely', 'look', - 'nasty', 'pet', 'psychotic', 'quilt', 'shock', 'smalldusty', 'sore', 'steel', 'suit', - 'tank', 'thrill'] + input_words = [ + "abhor", + "abuzz", + "accept", + "acorn", + "agony", + "albay", + "albin", + "algin", + "alisa", + "almug", + "altai", + "amato", + "ampyx", + "aneto", + "arbil", + "arrow", + "artha", + "aruba", + "athie", + "auric", + "aurum", + "cap", + "common", + "dime", + "eyes", + "foot", + "likeablelanguage", + "lonely", + "look", + "nasty", + "pet", + "psychotic", + "quilt", + "shock", + "smalldusty", + "sore", + "steel", + "suit", + "tank", + "thrill", + ] self.dawg.add_all(input_words) self.dawg.reduce() - self.assertListEqual(self.dawg.search_within_distance('arie', dist=2), ['arbil', 'athie', 'auric']) + self.assertListEqual( + self.dawg.search_within_distance("arie", dist=2), + ["arbil", "athie", "auric"], + ) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lexpy/tests/test_trie.py b/lexpy/tests/test_trie.py index 4faa378..2865c5c 100644 --- a/lexpy/tests/test_trie.py +++ b/lexpy/tests/test_trie.py @@ -7,16 +7,20 @@ HERE = os.path.dirname(__file__) -large_dataset = os.path.join(HERE, 'data/words.txt') -small_dataset = os.path.join(HERE, 'data/words2.txt') +large_dataset = os.path.join(HERE, "data/words.txt") +small_dataset = os.path.join(HERE, "data/words2.txt") class TestWordCount(unittest.TestCase): def test_word_count_greater_than_zero(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley', 'ashes']) - self.assertGreater(self.trie.get_word_count(), 0, "The number of words should be greater than 0") + self.trie.add_all(["ash", "ashley", "ashes"]) + self.assertGreater( + self.trie.get_word_count(), + 0, + "The number of words should be greater than 0", + ) self.assertEqual(3, self.trie.get_word_count(), "Word count not equal") def test_word_count_zero(self): @@ -29,80 +33,95 @@ class TestTrieExactWordSearch(unittest.TestCase): def test_word_in_trie(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertTrue('ash' in self.trie, "Word should be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertTrue("ash" in self.trie, "Word should be in trie") def test_word_not_int_trie1(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertFalse('salary' in self.trie, "Word should not be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertFalse("salary" in self.trie, "Word should not be in trie") def test_word_not_int_trie2(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertFalse('mash lolley' in self.trie, "Word should not be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertFalse("mash lolley" in self.trie, "Word should not be in trie") class TesTrieWordInsert(unittest.TestCase): def test_word_add(self): self.trie = Trie() - self.trie.add('axe') - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('axe' in self.trie, "Word should be in trie") + self.trie.add("axe") + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("axe" in self.trie, "Word should be in trie") def test_word_add_all_list(self): self.trie = Trie() - self.trie.add_all(['axe', 'kick']) # list - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('axe' in self.trie, "Word should be in trie") - self.assertTrue('kick' in self.trie, "Word should be in trie") + self.trie.add_all(["axe", "kick"]) # list + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("axe" in self.trie, "Word should be in trie") + self.assertTrue("kick" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") def test_word_add_all_set(self): self.trie = Trie() - self.trie.add_all({'axe', 'kick'}) # set - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('axe' in self.trie, "Word should be in trie") - self.assertTrue('kick' in self.trie, "Word should be in trie") + self.trie.add_all({"axe", "kick"}) # set + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("axe" in self.trie, "Word should be in trie") + self.assertTrue("kick" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") def test_word_add_all_tuple(self): self.trie = Trie() - self.trie.add_all(('axe', 'kick')) # tuple - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('axe' in self.trie, "Word should be in trie") - self.assertTrue('kick' in self.trie, "Word should be in trie") + self.trie.add_all(("axe", "kick")) # tuple + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("axe" in self.trie, "Word should be in trie") + self.assertTrue("kick" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") def test_word_add_all_with_number(self): self.trie = Trie() - self.trie.add_all(('axe', 'kick')) # tuple with one integer. - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('axe' in self.trie, "Word should be in trie") - self.assertTrue('kick' in self.trie, "Word should be in trie") + self.trie.add_all(("axe", "kick")) # tuple with one integer. + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("axe" in self.trie, "Word should be in trie") + self.assertTrue("kick" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") def test_word_add_all_gen(self): def gen_words(): - a = ['ash', 'ashley', 'simpson'] + a = ["ash", "ashley", "simpson"] for word in a: yield word + self.trie = Trie() - self.trie.add_all(gen_words()) # generator - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertTrue('simpson' in self.trie, "Word should be in trie") + self.trie.add_all(gen_words()) # generator + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertTrue("simpson" in self.trie, "Word should be in trie") self.assertEqual(3, self.trie.get_word_count(), "Word count not equal") def test_word_add_all_file_path(self): self.trie = Trie() - self.trie.add_all(small_dataset) # From a file - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertTrue('simpson' in self.trie, "Word should be in trie") + self.trie.add_all(small_dataset) # From a file + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertTrue("simpson" in self.trie, "Word should be in trie") self.assertEqual(8, self.trie.get_word_count(), "Word count not equal") @@ -110,10 +129,12 @@ class TestTrieNodeCount(unittest.TestCase): def test_trie_node_count(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") self.assertEqual(7, len(self.trie), "Number of nodes") @@ -122,98 +143,158 @@ class TestTriePrefixExists(unittest.TestCase): def test_trie_node_prefix_exists(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") - self.assertTrue(self.trie.contains_prefix('ash'), "Prefix should be present in Trie") - self.assertTrue(self.trie.contains_prefix('as'), "Prefix should be present in Trie") - self.assertTrue(self.trie.contains_prefix('a'), "Prefix should be present in Trie") + self.assertTrue( + self.trie.contains_prefix("ash"), "Prefix should be present in Trie" + ) + self.assertTrue( + self.trie.contains_prefix("as"), "Prefix should be present in Trie" + ) + self.assertTrue( + self.trie.contains_prefix("a"), "Prefix should be present in Trie" + ) def test_trie_node_prefix_not_exists(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") + self.trie.add_all(["ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") self.assertEqual(2, self.trie.get_word_count(), "Word count not equal") - self.assertFalse(self.trie.contains_prefix('xmas'), "Prefix should be present in Trie") - self.assertFalse(self.trie.contains_prefix('xor'), "Prefix should be present in Trie") - self.assertFalse(self.trie.contains_prefix('sh'), "Prefix should be present in Trie") + self.assertFalse( + self.trie.contains_prefix("xmas"), "Prefix should be present in Trie" + ) + self.assertFalse( + self.trie.contains_prefix("xor"), "Prefix should be present in Trie" + ) + self.assertFalse( + self.trie.contains_prefix("sh"), "Prefix should be present in Trie" + ) class TestTriePrefixSearch(unittest.TestCase): def test_trie_prefix_search(self): self.trie = Trie() - self.trie.add_all(['ashlame', 'ashley', 'askoiu', 'ashlo']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertFalse('ash' in self.trie, "Word should not be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") + self.trie.add_all(["ashlame", "ashley", "askoiu", "ashlo"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertFalse("ash" in self.trie, "Word should not be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") self.assertEqual(4, self.trie.get_word_count(), "Word count not equal") - self.assertTrue(self.trie.contains_prefix('ash'), "Prefix should be present in Trie") - self.assertEqual(sorted(self.trie.search_with_prefix('ash')), sorted(['ashlame', 'ashley', 'ashlo']), 'The lists should be equal') + self.assertTrue( + self.trie.contains_prefix("ash"), "Prefix should be present in Trie" + ) + self.assertEqual( + sorted(self.trie.search_with_prefix("ash")), + sorted(["ashlame", "ashley", "ashlo"]), + "The lists should be equal", + ) class TestWildCardSearch(unittest.TestCase): def test_trie_asterisk_search(self): self.trie = Trie() - self.trie.add_all(['ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertEqual(sorted(self.trie.search('a*')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.trie.search('a?*')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.trie.search('a*?')), sorted(['ash', 'ashley']), 'The lists should be equal') - self.assertEqual(sorted(self.trie.search('a***')), sorted(['ash', 'ashley']), 'The lists should be equal') + self.trie.add_all(["ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertEqual( + sorted(self.trie.search("a*")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.trie.search("a?*")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.trie.search("a*?")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) + self.assertEqual( + sorted(self.trie.search("a***")), + sorted(["ash", "ashley"]), + "The lists should be equal", + ) def test_trie_question_search(self): self.trie = Trie() - self.trie.add_all(['ab', 'as', 'ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertEqual(sorted(self.trie.search('a?')), sorted(['ab', 'as']), 'The lists should be equal') + self.trie.add_all(["ab", "as", "ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertEqual( + sorted(self.trie.search("a?")), + sorted(["ab", "as"]), + "The lists should be equal", + ) def test_trie_wildcard_search(self): self.trie = Trie() - self.trie.add_all(['ab', 'as', 'ash', 'ashley']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertEqual(sorted(self.trie.search('*a******?')), sorted(['ab', 'as', 'ash', 'ashley']), 'The lists should be equal') + self.trie.add_all(["ab", "as", "ash", "ashley"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertEqual( + sorted(self.trie.search("*a******?")), + sorted(["ab", "as", "ash", "ashley"]), + "The lists should be equal", + ) def test_trie_wildcard_exception(self): self.trie = Trie() - self.trie.add_all(['ab', 'as', 'ash', 'ashley', '#$%^a']) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertTrue('#$%^a' in self.trie) + self.trie.add_all(["ab", "as", "ash", "ashley", "#$%^a"]) + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertTrue("#$%^a" in self.trie) class TestBuildFromFile(unittest.TestCase): def test_trie_build_from_file_path(self): self.trie = build_trie_from_file(small_dataset) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertTrue('simpson' in self.trie, "Word should be in trie") + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertTrue("simpson" in self.trie, "Word should be in trie") self.assertEqual(8, self.trie.get_word_count(), "Word count not equal") def test_trie_build_from_file_object(self): - with open(small_dataset, 'r') as input_file: + with open(small_dataset, "r") as input_file: self.trie = build_trie_from_file(input_file) - self.assertIsInstance(self.trie, Trie, "Object should be of type `lexpy.trie.Trie`") - self.assertTrue('ash' in self.trie, "Word should be in trie") - self.assertTrue('ashley' in self.trie, "Word should be in trie") - self.assertTrue('simpson' in self.trie, "Word should be in trie") + self.assertIsInstance( + self.trie, Trie, "Object should be of type `lexpy.trie.Trie`" + ) + self.assertTrue("ash" in self.trie, "Word should be in trie") + self.assertTrue("ashley" in self.trie, "Word should be in trie") + self.assertTrue("simpson" in self.trie, "Word should be in trie") self.assertEqual(8, self.trie.get_word_count(), "Word count not equal") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/lexpy/tests/test_word_count.py b/lexpy/tests/test_word_count.py index 1addb9e..f1f0436 100644 --- a/lexpy/tests/test_word_count.py +++ b/lexpy/tests/test_word_count.py @@ -10,29 +10,29 @@ class TestTrieWordCount(unittest.TestCase): def test_with_count(self): trie = Trie() - trie.add_all(['ash', 'ashley', 'ashes', 'ashes']) - expected = [('ash', 1), ('ashley', 1), ('ashes', 2)] - self.assertListEqual(expected, trie.search('a*', with_count=True)) + trie.add_all(["ash", "ashley", "ashes", "ashes"]) + expected = [("ash", 1), ("ashley", 1), ("ashes", 2)] + self.assertListEqual(expected, trie.search("a*", with_count=True)) def test_without_count(self): trie = Trie() - trie.add_all(['ash', 'ashley', 'ashes', 'ashes']) - expected = ['ash', 'ashley', 'ashes'] - self.assertListEqual(expected, trie.search('a*')) + trie.add_all(["ash", "ashley", "ashes", "ashes"]) + expected = ["ash", "ashley", "ashes"] + self.assertListEqual(expected, trie.search("a*")) class TestDAWGWordCount(unittest.TestCase): def test_with_count(self): d = DAWG() - d.add_all(['ash', 'ashes', 'ashes', 'ashley']) + d.add_all(["ash", "ashes", "ashes", "ashley"]) d.reduce() - expected = [('ash', 1), ('ashes', 2), ('ashley', 1)] - self.assertListEqual(expected, d.search('a*', with_count=True)) + expected = [("ash", 1), ("ashes", 2), ("ashley", 1)] + self.assertListEqual(expected, d.search("a*", with_count=True)) def test_without_count(self): d = DAWG() - d.add_all(['ash', 'ashes', 'ashes', 'ashley']) + d.add_all(["ash", "ashes", "ashes", "ashley"]) d.reduce() - expected = ['ash', 'ashes', 'ashley'] - self.assertListEqual(expected, d.search('a*')) + expected = ["ash", "ashes", "ashley"] + self.assertListEqual(expected, d.search("a*")) diff --git a/lexpy/trie.py b/lexpy/trie.py index e77e46b..0a48220 100644 --- a/lexpy/trie.py +++ b/lexpy/trie.py @@ -1,22 +1,24 @@ from lexpy._base.node import FSANode from lexpy._base.automata import FSA -__all__ = ['Trie'] +__all__ = ["Trie"] class Trie(FSA): - __slots__ = 'root' + __slots__ = "root" def __init__(self): """Initialize a Trie Description: This method initializes a Trie instance by adding the root node. - By default, the id of the root node is 1 and number of words in the Trie is also 1. + By default, the id of the root node is 1 and number of words in + the Trie is also 1. + The label of the root node is an empty string '' """ - root = FSANode(0, '') + root = FSANode(0, "") super(Trie, self).__init__(root) def __len__(self): @@ -27,9 +29,7 @@ def __len__(self): """ return self._id - def add(self, - word: str, - count: int = 1): + def add(self, word: str, count: int = 1): """Adds a word in the trie Description: @@ -52,7 +52,7 @@ def add(self, self._id += 1 node.add_child(letter, _id=self._id) node = node[letter] - if i == len(word)-1: + if i == len(word) - 1: node.eow = True node.count += count self._num_of_words += count diff --git a/lexpy/utils.py b/lexpy/utils.py index 72658fc..336dfe1 100644 --- a/lexpy/utils.py +++ b/lexpy/utils.py @@ -13,4 +13,4 @@ def build_dawg_from_file(input_file): def build_trie_from_file(input_file): - return _build_from_file(input_file, clazz=Trie) \ No newline at end of file + return _build_from_file(input_file, clazz=Trie) diff --git a/pyproject.toml b/pyproject.toml index 41ad39b..448cdd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] requires = ['setuptools >= 40.8.0', 'wheel'] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index 6e8799c..aeaba3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = lexpy -version = 1.1.0 +version = 1.1.1 description = Python package for lexicon long_description = file: README.md long_description_content_type = text/markdown @@ -21,6 +21,7 @@ classifiers = 'Programming Language :: Python :: 3.10' 'Programming Language :: Python :: 3.11' 'Programming Language :: Python :: 3.12' + 'Programming Language :: Python :: 3.13' 'Operating System :: POSIX :: Linux' 'Operating System :: Unix' 'Operating System :: Microsoft :: Windows' @@ -49,3 +50,6 @@ python_requires = >=3.7 [options.packages.find] where = lexpy exclude = tests + +[flake8] +max-line-length = 90 diff --git a/setup.py b/setup.py index d2701da..ac0668f 100644 --- a/setup.py +++ b/setup.py @@ -3,54 +3,57 @@ this_directory = path.abspath(path.dirname(__file__)) -with open(path.join(this_directory, 'README.md')) as f: +with open(path.join(this_directory, "README.md")) as f: long_description = f.read() -DISTNAME = 'lexpy' -AUTHOR = 'Abhishek Singh' -MAINTAINER = 'Abhishek Singh' -MAINTAINER_EMAIL = 'abhishek.singh20141@gmail.com' -DESCRIPTION = 'Python package for lexicon' -LICENSE = 'GNU GPLv3' -URL = 'https://github.com/aosingh/lexpy' -VERSION = '1.1.0' +DISTNAME = "lexpy" +AUTHOR = "Abhishek Singh" +MAINTAINER = "Abhishek Singh" +MAINTAINER_EMAIL = "abhishek.singh20141@gmail.com" +DESCRIPTION = "Python package for lexicon" +LICENSE = "GNU GPLv3" +URL = "https://github.com/aosingh/lexpy" +VERSION = "1.1.1" -PACKAGES = ['lexpy'] +PACKAGES = ["lexpy"] classifiers = [ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Education', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'Topic :: Text Processing :: Linguistic', - 'Topic :: Text Processing :: Indexing', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - 'Operating System :: POSIX :: Linux', - 'Operating System :: Unix', - 'Operating System :: Microsoft :: Windows', - 'Operating System :: MacOS' + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Education", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Text Processing :: Linguistic", + "Topic :: Text Processing :: Indexing", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Operating System :: POSIX :: Linux", + "Operating System :: Unix", + "Operating System :: Microsoft :: Windows", + "Operating System :: MacOS", ] -keywords = 'trie suffix-trees lexicon directed-acyclic-word-graph dawg' +keywords = "trie suffix-trees lexicon directed-acyclic-word-graph dawg" -project_urls = {"Documentation": "https://github.com/aosingh/lexpy", - "Source": "https://github.com/aosingh/lexpy", - "Bug Tracker": "https://github.com/aosingh/lexpy/issues", - "CI": "https://github.com/aosingh/lexpy/actions", - "Release Notes": "https://github.com/aosingh/lexpy/releases", - "License": "https://github.com/aosingh/lexpy/blob/main/LICENSE"} +project_urls = { + "Documentation": "https://github.com/aosingh/lexpy", + "Source": "https://github.com/aosingh/lexpy", + "Bug Tracker": "https://github.com/aosingh/lexpy/issues", + "CI": "https://github.com/aosingh/lexpy/actions", + "Release Notes": "https://github.com/aosingh/lexpy/releases", + "License": "https://github.com/aosingh/lexpy/blob/main/LICENSE", +} setup( name=DISTNAME, long_description=long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", author=AUTHOR, author_email=MAINTAINER_EMAIL, maintainer=MAINTAINER, @@ -61,7 +64,7 @@ project_urls=project_urls, version=VERSION, packages=find_packages(exclude=("tests",)), - package_dir={'lexpy': 'lexpy'}, + package_dir={"lexpy": "lexpy"}, include_package_data=True, classifiers=classifiers, keywords=keywords.split(), From b6dea4b5c4c556f593b86df3b5c881f57ad2fd4f Mon Sep 17 00:00:00 2001 From: Abhishek Singh Date: Sun, 24 Nov 2024 21:33:24 -0800 Subject: [PATCH 2/3] exclude Python 3.7 from macosx --- .github/workflows/lexpy_build.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/lexpy_build.yaml b/.github/workflows/lexpy_build.yaml index 610e50f..a8b95ba 100644 --- a/.github/workflows/lexpy_build.yaml +++ b/.github/workflows/lexpy_build.yaml @@ -10,6 +10,11 @@ jobs: matrix: os: [macos-latest, windows-latest, ubuntu-latest] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9', 'pypy-3.10'] + exclude: + - os: macos-latest + version: '3.7' + - os: macos-latest + version: 'pypy-3.7' steps: - name: Checkout From 29c12b61e0a3e23131b7da34535e0620bfa8c6d3 Mon Sep 17 00:00:00 2001 From: Abhishek Singh Date: Sun, 24 Nov 2024 21:34:16 -0800 Subject: [PATCH 3/3] exclude Python 3.7 from macosx --- .github/workflows/lexpy_build.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lexpy_build.yaml b/.github/workflows/lexpy_build.yaml index a8b95ba..a82c0a9 100644 --- a/.github/workflows/lexpy_build.yaml +++ b/.github/workflows/lexpy_build.yaml @@ -12,9 +12,9 @@ jobs: python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9', 'pypy-3.10'] exclude: - os: macos-latest - version: '3.7' + python-version: '3.7' - os: macos-latest - version: 'pypy-3.7' + python-version: 'pypy-3.7' steps: - name: Checkout