Skip to content

Commit

Permalink
Merge pull request #30 from scalableminds/fuzzy-matching-wildcard-only
Browse files Browse the repository at this point in the history
Fuzzy matching wildcard only
  • Loading branch information
dickscheid authored Nov 25, 2022
2 parents 8cce871 + 412aca3 commit 4701a5e
Showing 1 changed file with 14 additions and 24 deletions.
38 changes: 14 additions & 24 deletions +siibra/+internal/fuzzyMatching.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,25 @@
end

%FUZZY_MATCHING returns index into the haystack or raises Exception
% If python is available use difflib to rank values in haystack based
% on similarity. If python is not available check if the query is a
% substring and return the first elementIndex for which this is true.
% Check if the words of the query occur in the given order
% and return the first elementIndex for which this is true.

lowerQuery = lower(query);
lowerHaystack = lower(haystack);

if any(strcmp(pyenv().Version, ["3.8", "3.9"]))
difflib = py.importlib.import_module('difflib');
matcher = arrayfun(@(hay) difflib.SequenceMatcher(a=lowerQuery, b=hay), lowerHaystack, 'UniformOutput', false);
ratios = cellfun(@(m) m.ratio(), matcher);
[maxRatio, matchedIndex] = max(ratios);
if maxRatio < 0.3
error ("Empty result for query " + query + ". Closest match: " + haystack(matchedIndex));
end
else
% no python available
% build pattern
words = split(lowerQuery, " ");
pattern = wildcardPattern;
for wordIndex = 1:numel(words)
pattern = pattern + words(wordIndex) + wildcardPattern;
end
matchedIndices = find(contains(lowerHaystack, pattern));
if isempty(matchedIndices)
error ("Empty result for query " + query + " in " + sprintf("%s", haystack + ", "));
end
matchedIndex = matchedIndices(1);
% build pattern
words = split(lowerQuery, " ");
pattern = wildcardPattern;
for wordIndex = 1:numel(words)
pattern = pattern + words(wordIndex) + wildcardPattern;
end

matchedIndices = find(contains(lowerHaystack, pattern));
if isempty(matchedIndices)
error ("Empty result for query " + query + " in " + sprintf("%s", haystack + ", "));
end
matchedIndex = matchedIndices(1);

if ~strcmp(query, haystack(matchedIndex))
display("Resolved query '" + query + "' to: " + haystack(matchedIndex));
end
Expand Down

0 comments on commit 4701a5e

Please sign in to comment.