From 378daca655daadf8c3e884f189f1a6b46d1f06be Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Thu, 28 Nov 2024 05:58:33 +0000 Subject: [PATCH 1/2] Weight towards exact matches --- .../__experimental-fetch-link-suggestions.ts | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts b/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts index e1a166ee272dbe..29012197589035 100644 --- a/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts +++ b/packages/core-data/src/fetch/__experimental-fetch-link-suggestions.ts @@ -270,12 +270,29 @@ export function sortResults( results: SearchResult[], search: string ) { for ( const result of results ) { if ( result.title ) { const titleTokens = tokenize( result.title ); - const matchingTokens = titleTokens.filter( ( titleToken ) => - searchTokens.some( ( searchToken ) => - titleToken.includes( searchToken ) + const exactMatchingTokens = titleTokens.filter( ( titleToken ) => + searchTokens.some( + ( searchToken ) => titleToken === searchToken ) ); - scores[ result.id ] = matchingTokens.length / titleTokens.length; + const subMatchingTokens = titleTokens.filter( ( titleToken ) => + searchTokens.some( + ( searchToken ) => + titleToken !== searchToken && + titleToken.includes( searchToken ) + ) + ); + + // The score is a combination of exact matches and sub-matches. + // More weight is given to exact matches, as they are more relevant (e.g. "cat" vs "caterpillar"). + // Diving by the total number of tokens in the title normalizes the score and skews + // the results towards shorter titles. + const exactMatchScore = + ( exactMatchingTokens.length / titleTokens.length ) * 10; + + const subMatchScore = subMatchingTokens.length / titleTokens.length; + + scores[ result.id ] = exactMatchScore + subMatchScore; } else { scores[ result.id ] = 0; } From 92fa46fbfe13b93b9b9c6b4d19ada14841da6792 Mon Sep 17 00:00:00 2001 From: Dave Smith Date: Thu, 28 Nov 2024 06:05:48 +0000 Subject: [PATCH 2/2] Add additional test coverage --- .../__experimental-fetch-link-suggestions.js | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js b/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js index 6878c74332c3d7..ad0014ff86ecb8 100644 --- a/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js +++ b/packages/core-data/src/fetch/test/__experimental-fetch-link-suggestions.js @@ -393,6 +393,43 @@ describe( 'sortResults', () => { 6, ] ); } ); + + it( 'orders results to prefer direct matches over sub matches', () => { + const results = [ + { + id: 1, + title: 'News', + url: 'http://wordpress.local/news/', + type: 'page', + kind: 'post-type', + }, + { + id: 2, + title: 'Newspaper', + url: 'http://wordpress.local/newspaper/', + type: 'page', + kind: 'post-type', + }, + { + id: 3, + title: 'News Flash News', + url: 'http://wordpress.local/news-flash-news/', + type: 'page', + kind: 'post-type', + }, + { + id: 4, + title: 'News', + url: 'http://wordpress.local/news-2/', + type: 'page', + kind: 'post-type', + }, + ]; + const order = sortResults( results, 'News' ).map( + ( result ) => result.id + ); + expect( order ).toEqual( [ 1, 4, 3, 2 ] ); + } ); } ); describe( 'tokenize', () => {