Skip to content

Commit

Permalink
feat: Add confidence behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Sep 24, 2024
1 parent 8154d9e commit f235c88
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 29 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,14 @@ Instance methods:
## Worker {#worker}

Implements client search worker interface. Uses prepared in indexer lunr index to resolve search requests.

Extends search score algorithm:

- Adds `tolerance` behavior.
`tolerance=0` - only search for strict equal words
`tolerance=1` - also search for words with unspecified tail. `word*`
`tolerance=2` - also search for words with unspecified tail and head. `*word*`

- Adds `confidence` behavior.
`phrased` - default. Additionally scores results by found phrase length
`sparsed` - Uses default lunr scoring algorithm.
6 changes: 6 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import type {ISearchWorkerConfig} from '@diplodoc/client';

enum Confidence {
Phrased = 'phrased',
Sparsed = 'sparsed',
}

export interface WorkerConfig extends ISearchWorkerConfig {
tolerance: number;
confidence: Confidence;
resources: {
index: string;
registry: string;
Expand Down
6 changes: 3 additions & 3 deletions src/worker/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ type Trimmer = (text: string, score: Score) => [string, Position[]];

export function format(
{base, mark}: WorkerConfig,
result: SearchResult[],
results: SearchResult[],
registry: Registry,
trim: Trimmer,
): SearchSuggestPageItem[] {
return result.map((entry) => {
return results.map((entry) => {
const doc = registry[entry.ref];
const item = {
type: 'page',
Expand All @@ -41,7 +41,7 @@ export function format(
}

export function short(text: string, score: Score): [string, Position[]] {
const {positions, maxScorePosition: position} = score;
const {positions, position} = score;
const [before, content, after] = split(text, position);
const head = before.length > SHORT_HEAD ? '...' + before.slice(-SHORT_HEAD) : before;
const tail = after.slice(0, Math.max(0, MAX_LENGTH - head.length - content.length));
Expand Down
1 change: 1 addition & 0 deletions src/worker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ self.api = {
async init() {
config = {
tolerance: 2,
confidence: 'phrased',
...self.config,
} as WorkerConfig;
},
Expand Down
53 changes: 31 additions & 22 deletions src/worker/score.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,43 @@ type ScoreResult = {

export type Score = {
positions: Position[];
avgScore: number;
sumScore: number;
maxScore: number;
maxScorePosition: Position;
score: number;
position: Position;
};

type FSM = () => FSM | null;

export function score(terms: string[], results: Index.Result) {
export function sparsed(result: Index.Result) {
const fields = normalize(result);
const scores: Record<string, Score> = {};

for (const [field] of Object.entries(INDEX_FIELDS)) {
const tokens = fields[field];

if (!tokens.length) {
continue;
}

scores[field] = {
positions: tokens.map(get('position')),
score: result.score,
position: tokens[0].position,
};
}

return scores;
}

export function phrased(result: Index.Result, terms: string[]) {
const phrase = terms.join(' ');
const fields = normalize(results);
const fields = normalize(result);
const scores: Record<string, Score> = {};

let state: ScoreState, tokens: ResultToken[];
let result: ScoreResult[];
let results: ScoreResult[];
for (const [field] of Object.entries(INDEX_FIELDS)) {
tokens = fields[field];
result = [];
results = [];

if (!tokens.length) {
continue;
Expand All @@ -53,11 +72,9 @@ export function score(terms: string[], results: Index.Result) {
}

scores[field] = {
positions: result.map(get('position')),
avgScore: result.map(get('score')).reduce(avg, 0),
sumScore: result.map(get('score')).reduce(sum, 0),
maxScore: result.map(get('score')).reduce(max, 0),
maxScorePosition: result.reduce(maxScorePosition).position,
positions: results.map(get('position')),
score: results.map(get('score')).reduce(max, 0),
position: results.reduce(maxScorePosition).position,
};
}

Expand All @@ -79,7 +96,7 @@ export function score(terms: string[], results: Index.Result) {

function nextScore() {
const {score, position} = state;
result.push({score, position});
results.push({score, position});

state.score = 0;
state.position = state.curr.position.slice() as Position;
Expand Down Expand Up @@ -154,14 +171,6 @@ function max(a: number, b: number) {
return Math.max(a, b);
}

function avg(a: number, b: number) {
return (a + b) / 2;
}

function sum(a: number, b: number) {
return a + b;
}

function maxScorePosition(a: ScoreResult, b: ScoreResult) {
return a.score >= b.score ? a : b;
}
Expand Down
31 changes: 27 additions & 4 deletions src/worker/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import type {WorkerConfig} from '../types';
// @ts-ignore
import {Query, QueryParser} from 'lunr';

import {score} from './score';
import {INDEX_FIELDS} from '../constants';

import {phrased, sparsed} from './score';

const withIndex = (index: Index) => (builder: Index.QueryBuilder | false) =>
function withIndex() {
Expand Down Expand Up @@ -66,7 +68,7 @@ const makeStrategies = (tolerance: number, index: Index, clauses: FixedClause[],
export type SearchResult = Index.Result & {scores: Record<string, Score>};

export function search(
{tolerance}: WorkerConfig,
{tolerance, confidence}: WorkerConfig,
index: Index,
query: string,
count: number,
Expand All @@ -78,6 +80,7 @@ export function search(
const strategies = makeStrategies(tolerance, index, clauses, sealed);
const refs = new Set<string>();

const score = confidence === 'sparsed' ? sparsed : phrased;
const results: SearchResult[] = [];
while (refs.size < count && strategies.length) {
const strategy = strategies.shift() as Strategy;
Expand All @@ -86,15 +89,16 @@ export function search(
for (const entry of match) {
if (!refs.has(entry.ref)) {
refs.add(entry.ref);

results.push({
...entry,
scores: score(terms, entry),
scores: score(entry, terms),
});
}
}
}

return results.slice(0, count);
return results.sort(byMaxScore).slice(0, count);
}

function wildcard(clause: FixedClause, mode: Query.wildcard) {
Expand All @@ -111,3 +115,22 @@ function wildcard(clause: FixedClause, mode: Query.wildcard) {
clause.wildcard = mode;
clause.usePipeline = false;
}

function byMaxScore(a: SearchResult, b: SearchResult) {
const aScore = getMaxScore(a);
const bScore = getMaxScore(b);

return bScore - aScore;
}

function getMaxScore(result: SearchResult) {
let score = 0;
for (const [field] of Object.entries(INDEX_FIELDS)) {
const scores = result.scores[field];
if (scores) {
score = Math.max(scores.score, score);
}
}

return score;
}

0 comments on commit f235c88

Please sign in to comment.