Skip to content

Commit

Permalink
fix incorrect result normalization in setratio and seqratio
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Oct 14, 2022
1 parent 4d12d5c commit d78528d
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 10 deletions.
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Changelog

### v0.20.6
#### Fixed
- fix incorrect result normalization in `setratio` and `seqratio`

### v0.20.5
#### Fixed
- fix support for cmake versions below 3.17
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Max Bachmann'

# The full version, including alpha/beta/rc tags
release = '0.20.5'
release = '0.20.6'

# -- General configuration ---------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="Levenshtein",
version="0.20.5",
version="0.20.6",
url="https://github.com/maxbachmann/Levenshtein",
author="Max Bachmann",
install_requires=["rapidfuzz >= 2.3.0, < 3.0.0"],
Expand Down
8 changes: 5 additions & 3 deletions src/Levenshtein/StringMatcher.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from Levenshtein import *
from warnings import warn


class StringMatcher:
"""A SequenceMatcher-like class built on the top of Levenshtein"""

def _reset_cache(self):
self._ratio = self._distance = None
self._opcodes = self._editops = self._matching_blocks = None

def __init__(self, isjunk=None, seq1='', seq2='', autojunk=False):
def __init__(self, isjunk=None, seq1="", seq2="", autojunk=False):
if isjunk:
warn("isjunk NOT implemented, it will be ignored")
if autojunk:
Expand Down Expand Up @@ -46,8 +47,9 @@ def get_editops(self):

def get_matching_blocks(self):
if not self._matching_blocks:
self._matching_blocks = matching_blocks(self.get_opcodes(),
self._str1, self._str2)
self._matching_blocks = matching_blocks(
self.get_opcodes(), self._str1, self._str2
)
return self._matching_blocks

def ratio(self):
Expand Down
24 changes: 22 additions & 2 deletions src/Levenshtein/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

__author__: str = "Max Bachmann"
__license__: str = "GPL"
__version__: str = "0.20.5"
__version__: str = "0.20.6"

import rapidfuzz.distance.Levenshtein as _Levenshtein
import rapidfuzz.distance.Indel as _Indel
Expand All @@ -37,6 +37,26 @@
seqratio,
)

__all__ = [
"quickmedian",
"median",
"median_improve",
"setmedian",
"setratio",
"seqratio",
"distance",
"ratio",
"hamming",
"jaro",
"jaro_winkler",
"editops",
"opcodes",
"matching_blocks",
"apply_edit",
"subtract_edit",
"inverse",
]


def distance(s1, s2, *, weights=(1, 1, 1), processor=None, score_cutoff=None):
"""
Expand Down Expand Up @@ -67,7 +87,7 @@ def distance(s1, s2, *, weights=(1, 1, 1), processor=None, score_cutoff=None):
-------
distance : int
distance between s1 and s2
Raises
------
ValueError
Expand Down
6 changes: 3 additions & 3 deletions src/Levenshtein/levenshtein_cpp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def setratio(strlist1, strlist2, *):
else:
dist = lev_set_distance(strings1, strings2)

return <double>lensum - dist / <double>lensum
return (<double>lensum - dist) / <double>lensum

def seqratio(strlist1, strlist2, *):
"""
Expand All @@ -235,7 +235,7 @@ def seqratio(strlist1, strlist2, *):
>>> seqratio(['newspaper', 'litter bin', 'tinny', 'antelope'],
... ['caribou', 'sausage', 'gorn', 'woody'])
0.21517857142857144
0.215178...
"""

strings1 = extract_stringlist(strlist1)
Expand All @@ -252,4 +252,4 @@ def seqratio(strlist1, strlist2, *):
else:
dist = lev_edit_seq_distance(strings1, strings2)

return <double>lensum - dist / <double>lensum
return (<double>lensum - dist) / <double>lensum
16 changes: 16 additions & 0 deletions tests/test_seq_ratio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import Levenshtein


def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)


def test_documented():
"""
run tests from documentation
"""
ratio = Levenshtein.seqratio(
["newspaper", "litter bin", "tinny", "antelope"],
["caribou", "sausage", "gorn", "woody"],
)
assert isclose(ratio, 0.21517857142857144)
16 changes: 16 additions & 0 deletions tests/test_set_ratio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import Levenshtein


def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)


def test_documented():
"""
run tests from documentation
"""
ratio = Levenshtein.setratio(
["newspaper", "litter bin", "tinny", "antelope"],
["caribou", "sausage", "gorn", "woody"],
)
assert isclose(ratio, 0.2818452380952381)

0 comments on commit d78528d

Please sign in to comment.