Skip to content

Commit

Permalink
Matcher仕組みとキーワードマッチルールを追加
Browse files Browse the repository at this point in the history
  • Loading branch information
LINYE-MARIANA committed Jun 27, 2024
1 parent a07bffc commit 3b3b9ad
Show file tree
Hide file tree
Showing 12 changed files with 1,138 additions and 203 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.1.0

- Add Matcher class

## 1.0.3

- Throw exceptions if fetcher failed to fetch data expectedly
Expand Down
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,16 @@ void main() async {
});
List<LocaleDataItem> jaData = await fetcher.fetch("ja");
// Matcherを作成
// KeywordForwardMatcher.fromProperties: MatcherPropertiesクラスを使って動的に値を設定することもできます
// MatcherProperties props = MatcherProperties()
// MatcherProperties.maxResults = 5
// KeywordForwardMatcher matcher = KeywordForwardMatcher.fromProperties()
KeywordForwardMatcher matcher = KeywordForwardMatcher()
// Generatorを用いて補完データを生成
Generator generator = Generator(
minKeywordLength: 2, keywordSeparator: ",", strictMatchLocales: ["en"]);
Generator generator = Generator.fromMatcher(matcher);
generator.loadData("ja", jaData);
List<MatchedResultData> completions =
Expand Down
11 changes: 10 additions & 1 deletion example/example.dart
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ void main(List<String> args) async {
String host = "";
String apiKey = "";
String locale = "ja";
int? maxResults;

for (var i = 0; i < args.length; i++) {
if (i >= args.length - 1) {
Expand All @@ -17,10 +18,18 @@ void main(List<String> args) async {
apiKey = args[i + 1];
} else if (args[i] == "--locale") {
locale = args[i + 1];
} else if (args[i] == "--max-results") {
maxResults = int.tryParse(args[i + 1]) ?? 0;
}
}

Generator generator = Generator();
MatcherProperties props = MatcherProperties();
if (maxResults != null) {
props.maxResults = maxResults;
}
KeywordForwardMatcher matcher = KeywordForwardMatcher.fromProperties(props);
// KeywordForwardMatcher matcher = KeywordForwardMatcher(maxResults: 5);
Generator generator = Generator.fromMatcher(matcher);

print("Fetching [$locale] data from $host with API key $apiKey");
Fetcher fetcher = Fetcher(
Expand Down
9 changes: 9 additions & 0 deletions lib/obot_completion_generator.dart
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,12 @@ export 'src/types.dart'
LocaleDataFilter,
GetEndpoint,
HandleResponse;
export 'src/matcher.dart'
show
MatcherProperties,
Matcher,
DefaultMatcher,
ForwardMatcher,
KeywordMatcher,
ConcatMatcher,
KeywordForwardMatcher;
215 changes: 26 additions & 189 deletions lib/src/generator.dart
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
import 'types.dart';
import 'matcher.dart';

class Generator {
final String _keywordSeparator;
final int _minKeywordLength;
final List<String> _strictMatchLocales;
final LocaleDataComparator? _comparator;
final LocaleDataFilter? _filter;
final Map<String, List<LocaleDataItem>> _data = {};

Generator(
{keywordSeparator = ",",
minKeywordLength = 2,
strictMatchLocales = const ["en"],
comparator,
filter})
: _keywordSeparator = keywordSeparator,
_minKeywordLength = minKeywordLength,
_strictMatchLocales = strictMatchLocales,
_comparator = comparator,
_filter = filter;

/* print時の文字列を定義 */
@override
String toString() {
return 'Generator(keywordSeparator: $_keywordSeparator, minKeywordLength: $_minKeywordLength, strictMatchLocales: $_strictMatchLocales)';
}
final Matcher _matcher;

factory Generator({
String keywordSeparator = ",",
int minKeywordLength = 2,
List<String> strictMatchLocales = const ["en"],
LocaleDataComparator? comparator,
LocaleDataFilter? filter,
MatchedResultDataScorer? scorer,
MatchedResultDataSort? sort,
int? maxResults
}) => Generator.fromMatcher(DefaultMatcher(
keywordSeparator: keywordSeparator,
minKeywordLength: minKeywordLength,
strictMatchLocales: strictMatchLocales,
comparator: comparator,
filter: filter,
scorer: scorer,
sort: sort,
maxResults: maxResults
));

Generator.fromMatcher(Matcher matcher) : _matcher = matcher;

/// 候補データをインスタンスにセットする
/// @param locale ロケール
/// @param localeData ロケールデータ
loadData(String locale, List<LocaleDataItem> localeData) {
if (localeData.isNotEmpty) {
_data[locale] = localeData;
_matcher.loadData(locale, localeData);
}
}

Expand All @@ -43,170 +43,7 @@ class Generator {
return [];
}

List<MatchedResultData> results = [];

if (_data.containsKey(locale)) {
List<LocaleDataItem>? localeData = _data[locale];

if (localeData == null) {
return [];
}

if (_comparator != null) {
localeData.sort((a, b) => _comparator(a, b, input, locale));
}

if (_filter != null) {
results = _filter(localeData, input, locale);
} else {
results = _getMatchedCompletions(localeData, input, locale);
}
}

List<MatchedResultData> results = _matcher.match(input, locale);
return results;
}

List<MatchedResultData> _getMatchedCompletions(
List<LocaleDataItem> localeData, String input, String locale) {
bool doStrictMatch = _strictMatchLocales.contains(locale);
List<MatchedResultData> results = [];

for (LocaleDataItem item in localeData) {
MatchedResult checkResult = doStrictMatch
? _strictMatch(item, input.toLowerCase())
: _match(item, input.toLowerCase());
if (checkResult.isMatched) {
results.add(checkResult.data);
}
}
return results;
}

MatchedResult _match(LocaleDataItem item, String input) {
String text = item.text.toLowerCase();
String keywords = item.keywords.toLowerCase();

int inputLength = input.length;
List<MatchedKeyword> matchedKeywords = [];

int startAt = 0;
while (startAt < inputLength) {
String matchedKeyword = "";
String word = input[startAt];

if (keywords.contains(word)) {
int endAt = startAt;
matchedKeyword = word;
if (endAt < inputLength - 1) {
// 次にまだ文字がある場合

endAt += 1;
// 最長のマッチできるキーワードを探し出す
while (endAt < inputLength) {
String checkWord = matchedKeyword + input[endAt];
if (!keywords.contains(checkWord)) {
endAt -= 1;
break;
}
matchedKeyword = checkWord;
endAt += 1;
}
}

if (matchedKeyword.length >= _minKeywordLength) {
matchedKeywords.add(MatchedKeyword(
text: matchedKeyword, startAt: startAt, endAt: endAt));
}

startAt = endAt + 1;
} else if (!text.contains(word)) {
return MatchedResult(
isMatched: false,
data: MatchedResultData(text: item.text, keywords: item.keywords));
} else {
startAt += 1;
}
}

List<String> unmatchedParts = [];

int keywordIdx = 0;
MatchedKeyword? prevKeyword;
MatchedKeyword? currentKeyword;

while (keywordIdx < matchedKeywords.length) {
currentKeyword = matchedKeywords[keywordIdx];
int prevEndAt = prevKeyword?.endAt ?? 0;
int startAt = currentKeyword.startAt;

if (startAt > prevEndAt) {
unmatchedParts.add(input.substring(prevEndAt + 1, startAt));
}

prevKeyword = currentKeyword;
keywordIdx += 1;
}

if (keywordIdx == 0) {
unmatchedParts.add(input);
} else if (currentKeyword != null) {
int lastEndAt = currentKeyword.endAt;
if (lastEndAt + 1 < inputLength) {
unmatchedParts.add(input.substring(lastEndAt + 1, inputLength));
}
}

bool isMatched = unmatchedParts.every((word) => text.contains(word));

return MatchedResult(
isMatched: isMatched,
data: MatchedResultData(
text: item.text,
keywords: item.keywords,
matchedKeywords: matchedKeywords));
}

MatchedResult _strictMatch(LocaleDataItem item, String input) {
// 候補データの質問内容とキーワード
String text = item.text.toLowerCase();

// 英語などのスペース区切りの言語は、単語ごとにマッチする
// NOTE: なるべくマッチしやすいよう、複数の単語でできたキーワードも分割して、一単語でもマッチ成功と見なす
List<String> keywords = [];
item.keywords.toLowerCase().split(_keywordSeparator).forEach((kparts) {
kparts.split(" ").forEach((kp) {
keywords.add(kp);
});
});

List<String> inputs = input.split(" ");
String lastInputPart = inputs.removeLast();

// 最後の単語だけは入力途中なので、部分一致でマッチ
bool lastInputMatched = text.contains(lastInputPart) ||
keywords.any((kw) => kw.contains(lastInputPart));
if (!lastInputMatched) {
return MatchedResult(
isMatched: false,
data: MatchedResultData(text: item.text, keywords: item.keywords));
}

List<MatchedKeyword> matchedKeywords =
inputs.where((ipt) => keywords.contains(ipt)).map((kw) {
int startAt = input.indexOf(kw);
int endAt = startAt + kw.length;
return MatchedKeyword(text: kw, startAt: startAt, endAt: endAt);
}).toList();

List<String> unmatchedParts =
inputs.where((ipt) => !keywords.contains(ipt)).toList();
bool isMatched = unmatchedParts.every((word) => text.contains(word));

return MatchedResult(
isMatched: isMatched,
data: MatchedResultData(
text: item.text,
keywords: item.keywords,
matchedKeywords: matchedKeywords));
}
}
Loading

0 comments on commit 3b3b9ad

Please sign in to comment.