diff --git a/src/webhook/handlers/initState.ts b/src/webhook/handlers/initState.ts index b9a21fc1..197050ad 100644 --- a/src/webhook/handlers/initState.ts +++ b/src/webhook/handlers/initState.ts @@ -1,28 +1,23 @@ -import stringSimilarity from 'string-similarity'; import { t } from 'ttag'; -import { - FlexBubble, - FlexComponent, - FlexMessage, - Message, - TextMessage, -} from '@line/bot-sdk'; +import { FlexBubble, FlexMessage, Message, TextMessage } from '@line/bot-sdk'; + import type { Result, Context, CooccurredMessage, } from 'src/types/chatbotState'; +import ga from 'src/lib/ga'; +import detectDialogflowIntent from 'src/lib/detectDialogflowIntent'; + import { createPostbackAction, ellipsis, POSTBACK_NO_ARTICLE_FOUND, - createHighlightContents, createTextMessage, createArticleSourceReply, searchText, + createTextCarouselContents, } from './utils'; -import ga from 'src/lib/ga'; -import detectDialogflowIntent from 'src/lib/detectDialogflowIntent'; import choosingArticle from './choosingArticle'; const SIMILARITY_THRESHOLD = 0.95; @@ -81,7 +76,7 @@ const initState = async ({ const inputSummary = ellipsis(input, 12); - if (result?.edges.length) { + if (result.edges.length) { // Track if find similar Articles in DB. visitor.event({ ec: 'UserInput', ea: 'ArticleSearch', el: 'ArticleFound' }); @@ -95,23 +90,9 @@ const initState = async ({ }); }); - const edgesSortedWithSimilarity = result.edges - .map((edge) => ({ - ...edge, - similarity: stringSimilarity.compareTwoStrings( - // Remove spaces so that we count word's similarities only - // - (edge.node.text ?? '').replace(/\s/g, ''), - input.replace(/\s/g, '') - ), - })) - .sort((edge1, edge2) => edge2.similarity - edge1.similarity) - .slice(0, 9); /* flex carousel has at most 10 bubbles */ + const hasIdenticalDocs = result.edges[0].similarity >= SIMILARITY_THRESHOLD; - const hasIdenticalDocs = - edgesSortedWithSimilarity[0].similarity >= SIMILARITY_THRESHOLD; - - if (edgesSortedWithSimilarity.length === 1 && hasIdenticalDocs) { + if (result.edges.length === 1 && hasIdenticalDocs) { visitor.send(); return await choosingArticle({ @@ -120,107 +101,15 @@ const initState = async ({ postbackData: { sessionId: context.sessionId, state: 'CHOOSING_ARTICLE', - input: edgesSortedWithSimilarity[0].node.id, + input: result.edges[0].node.id, }, userId, }); } - const articleOptions: FlexBubble[] = edgesSortedWithSimilarity.map( - ({ node: { text, id, articleType }, highlight, similarity }) => { - const similarityPercentage = Math.round(similarity * 100); - const similarityEmoji = ['๐Ÿ˜', '๐Ÿ™‚', '๐Ÿ˜€', '๐Ÿ˜ƒ', '๐Ÿ˜„'][ - Math.floor(similarity * 4.999) - ]; - const displayTextWhenChosen = ellipsis(text ?? '', 25, '...'); - - const bodyContents: FlexComponent[] = []; - - const { contents: highlightContents, source: highlightSource } = - createHighlightContents(highlight); - - let highlightSourceInfo = ''; - switch (highlightSource) { - case 'hyperlinks': - highlightSourceInfo = t`(Words found in the hyperlink)`; - break; - case 'text': - if (articleType !== 'TEXT') { - highlightSourceInfo = t`(Words found in transcript)`; - } - } - if (highlightSourceInfo) { - bodyContents.push({ - type: 'text', - text: highlightSourceInfo, - size: 'sm', - color: '#ff7b7b', - weight: 'bold', - }); - } - - bodyContents.push({ - type: 'text', - contents: highlightContents, - maxLines: 6, - flex: 0, - gravity: 'top', - weight: 'regular', - wrap: true, - }); - - return { - type: 'bubble', - direction: 'ltr', - header: { - type: 'box', - layout: 'horizontal', - spacing: 'md', - paddingBottom: 'none', - contents: [ - { - type: 'text', - text: similarityEmoji, - flex: 0, - }, - { - type: 'text', - text: t`Looks ${similarityPercentage}% similar`, - gravity: 'center', - size: 'sm', - weight: 'bold', - wrap: true, - color: '#AAAAAA', - }, - ], - }, - body: { - type: 'box', - layout: 'vertical', - spacing: 'none', - margin: 'none', - contents: bodyContents, - }, - footer: { - type: 'box', - layout: 'horizontal', - contents: [ - { - type: 'button', - action: createPostbackAction( - t`Choose this one`, - id, - t`I choose โ€œ${displayTextWhenChosen}โ€`, - context.sessionId, - 'CHOOSING_ARTICLE' - ), - style: 'primary', - color: '#ffb600', - }, - ], - }, - }; - } + const articleOptions: FlexBubble[] = createTextCarouselContents( + result.edges, + context.sessionId ); // Show "no-article-found" option only when no identical docs are found diff --git a/src/webhook/handlers/processMedia.ts b/src/webhook/handlers/processMedia.ts index 1f541aad..70e7022a 100644 --- a/src/webhook/handlers/processMedia.ts +++ b/src/webhook/handlers/processMedia.ts @@ -1,11 +1,8 @@ import { t } from 'ttag'; -import type { - FlexBubble, - Message, - FlexMessage, - FlexComponent, -} from '@line/bot-sdk'; +import type { Message, FlexMessage } from '@line/bot-sdk'; + import { Context, CooccurredMessage } from 'src/types/chatbotState'; +import ga from 'src/lib/ga'; import { getLineContentProxyURL, @@ -13,18 +10,11 @@ import { POSTBACK_NO_ARTICLE_FOUND, createTextMessage, createAskArticleSubmissionConsentReply, - createHighlightContents, searchMedia, + createMediaCarouselContents, } from './utils'; -import gql from 'src/lib/gql'; -import ga from 'src/lib/ga'; import choosingArticle from './choosingArticle'; -import { - ListArticlesInProcessMediaQuery, - ListArticlesInProcessMediaQueryVariables, -} from 'typegen/graphql'; -const CIRCLED_DIGITS = 'โ“ชโ‘ โ‘กโ‘ขโ‘ฃโ‘คโ‘ฅโ‘ฆโ‘งโ‘จโ‘ฉโ‘ช'; const SIMILARITY_THRESHOLD = 0.95; export default async function (message: CooccurredMessage, userId: string) { @@ -61,12 +51,8 @@ export default async function (message: CooccurredMessage, userId: string) { }); }); - const edgesSortedWithSimilarity = [...result.edges].sort( - (a, b) => b.mediaSimilarity - a.mediaSimilarity - ); - const hasIdenticalDocs = - edgesSortedWithSimilarity[0].mediaSimilarity >= SIMILARITY_THRESHOLD; + result.edges[0].mediaSimilarity >= SIMILARITY_THRESHOLD; if (result.edges.length === 1 && hasIdenticalDocs) { visitor.send(); @@ -77,134 +63,16 @@ export default async function (message: CooccurredMessage, userId: string) { postbackData: { state: 'CHOOSING_ARTICLE', sessionId: context.sessionId, - input: edgesSortedWithSimilarity[0].node.id, + input: result.edges[0].node.id, }, userId, }); } - const articleOptions = result.edges - .map( - ( - { - node: { attachmentUrl, id, articleType }, - highlight, - mediaSimilarity, - }, - index - ): FlexBubble => { - const displayTextWhenChosen = CIRCLED_DIGITS[index + 1]; - - const { contents: highlightContents, source: highlightSource } = - createHighlightContents(highlight); - - const similarityPercentage = Math.round(mediaSimilarity * 100); - - const looks = - mediaSimilarity > 0 - ? t`Looks ${similarityPercentage}% similar` - : highlightSource === null - ? t`Similar file` - : t`Contains relevant text`; - - const bodyContents: FlexComponent[] = []; - - if (highlightSource) { - let highlightSourceInfo = ''; - switch (highlightSource) { - case 'hyperlinks': - highlightSourceInfo = t`(Text in the hyperlink)`; - break; - case 'text': - if (articleType !== 'TEXT') { - highlightSourceInfo = t`(Text in transcript)`; - } - } - - if (highlightSourceInfo) { - bodyContents.push({ - type: 'text', - text: highlightSourceInfo, - size: 'sm', - color: '#ff7b7b', - weight: 'bold', - }); - } - - bodyContents.push({ - type: 'text', - contents: highlightContents, - // Show less lines if there are thumbnails to show - maxLines: attachmentUrl ? 5 : 12, - flex: 0, - gravity: 'top', - weight: 'regular', - wrap: true, - }); - } - - return { - type: 'bubble', - direction: 'ltr', - header: { - type: 'box', - layout: 'horizontal', - spacing: 'sm', - paddingBottom: 'md', - contents: [ - { - type: 'text', - text: displayTextWhenChosen + ' ' + looks, - gravity: 'center', - size: 'sm', - weight: 'bold', - wrap: true, - color: '#AAAAAA', - }, - ], - }, - - // Show thumbnail image if available - hero: !attachmentUrl - ? undefined - : { - type: 'image', - url: attachmentUrl, - size: 'full', - }, - - // Show highlighted text if available - body: - bodyContents.length === 0 - ? undefined - : { - type: 'box', - layout: 'vertical', - contents: bodyContents, - }, - - footer: { - type: 'box', - layout: 'horizontal', - contents: [ - { - type: 'button', - action: createPostbackAction( - t`Choose this one`, - id, - t`I choose ${displayTextWhenChosen}`, - context.sessionId, - 'CHOOSING_ARTICLE' - ), - style: 'primary', - color: '#ffb600', - }, - ], - }, - }; - } - ) - .slice(0, 9); /* flex carousel has at most 10 bubbles */ + const articleOptions = createMediaCarouselContents( + result.edges, + context.sessionId + ); // Show "no-article-found" option only when no identical docs are found // diff --git a/src/webhook/handlers/utils.ts b/src/webhook/handlers/utils.ts index 50f3e806..a5ebb0fb 100644 --- a/src/webhook/handlers/utils.ts +++ b/src/webhook/handlers/utils.ts @@ -11,6 +11,7 @@ import type { } from '@line/bot-sdk'; import { t, msgid, ngettext } from 'ttag'; import GraphemeSplitter from 'grapheme-splitter'; +import stringSimilarity from 'string-similarity'; import gql from 'src/lib/gql'; import { getArticleURL, createTypeWords } from 'src/lib/sharedUtils'; @@ -856,9 +857,26 @@ export function getLineContentProxyURL(messageId: string) { return `${process.env.RUMORS_LINE_BOT_URL}/getcontent?token=${jwt}`; } -export async function searchText( - text: string -): Promise { +/** + * ListArticle result with similarity score + */ +type SearchTextResult = Omit< + ListArticlesInInitStateQuery['ListArticles'], + 'edges' +> & { + edges: Array< + NonNullable< + ListArticlesInInitStateQuery['ListArticles'] + >['edges'][number] & { + similarity: number; + } + >; +}; + +/** + * Searches for text and reorder with string similarity + */ +export async function searchText(text: string): Promise { const { data: { ListArticles }, } = await gql` @@ -887,13 +905,142 @@ export async function searchText( `({ text, }); - return ListArticles; + + const sanitizedText = text.replace(/\s/g, ''); + const edgesSortedWithSimilarity = + ListArticles?.edges + .map((edge) => ({ + ...edge, + similarity: stringSimilarity.compareTwoStrings( + // Remove spaces so that we count word's similarities only + // + (edge.node.text ?? '').replace(/\s/g, ''), + sanitizedText + ), + })) + .sort((edge1, edge2) => edge2.similarity - edge1.similarity) ?? []; + + return { + ...ListArticles, + edges: edgesSortedWithSimilarity, + }; } +export function createTextCarouselContents( + edges: SearchTextResult['edges'], + sessionId: number +) { + return edges + .map( + ({ node: { text, id, articleType }, highlight, similarity }) => { + const similarityPercentage = Math.round(similarity * 100); + const similarityEmoji = ['๐Ÿ˜', '๐Ÿ™‚', '๐Ÿ˜€', '๐Ÿ˜ƒ', '๐Ÿ˜„'][ + Math.floor(similarity * 4.999) + ]; + const displayTextWhenChosen = ellipsis(text ?? '', 25, '...'); + + const bodyContents: FlexComponent[] = []; + + const { contents: highlightContents, source: highlightSource } = + createHighlightContents(highlight); + + let highlightSourceInfo = ''; + switch (highlightSource) { + case 'hyperlinks': + highlightSourceInfo = t`(Words found in the hyperlink)`; + break; + case 'text': + if (articleType !== 'TEXT') { + highlightSourceInfo = t`(Words found in transcript)`; + } + } + if (highlightSourceInfo) { + bodyContents.push({ + type: 'text', + text: highlightSourceInfo, + size: 'sm', + color: '#ff7b7b', + weight: 'bold', + }); + } + + bodyContents.push({ + type: 'text', + contents: highlightContents, + maxLines: 6, + flex: 0, + gravity: 'top', + weight: 'regular', + wrap: true, + }); + + return { + type: 'bubble', + direction: 'ltr', + header: { + type: 'box', + layout: 'horizontal', + spacing: 'md', + paddingBottom: 'none', + contents: [ + { + type: 'text', + text: similarityEmoji, + flex: 0, + }, + { + type: 'text', + text: t`Looks ${similarityPercentage}% similar`, + gravity: 'center', + size: 'sm', + weight: 'bold', + wrap: true, + color: '#AAAAAA', + }, + ], + }, + body: { + type: 'box', + layout: 'vertical', + spacing: 'none', + margin: 'none', + contents: bodyContents, + }, + footer: { + type: 'box', + layout: 'horizontal', + contents: [ + { + type: 'button', + action: createPostbackAction( + t`Choose this one`, + id, + t`I choose โ€œ${displayTextWhenChosen}โ€`, + sessionId, + 'CHOOSING_ARTICLE' + ), + style: 'primary', + color: '#ffb600', + }, + ], + }, + }; + } + ) /* flex carousel has at most 10 bubbles */ + .slice(0, 9); +} + +type SearchMediaResult = Omit< + ListArticlesInProcessMediaQuery['ListArticles'], + 'edges' +> & { + edges: NonNullable['edges']; +}; + export async function searchMedia( mediaUrl: string, userId: string -): Promise { +): Promise { const { data: { ListArticles }, } = await gql` @@ -929,5 +1076,145 @@ export async function searchMedia( { mediaUrl }, { userId } ); - return ListArticles; + return { + ...ListArticles, + edges: [...(ListArticles?.edges ?? [])].sort( + (a, b) => b.mediaSimilarity - a.mediaSimilarity + ), + }; +} + +const CIRCLED_DIGITS = 'โ“ชโ‘ โ‘กโ‘ขโ‘ฃโ‘คโ‘ฅโ‘ฆโ‘งโ‘จโ‘ฉโ‘ช'; + +/** + * @param edges - edge data returned by searchMedia() + * @param sessionId + * @returns + */ +export function createMediaCarouselContents( + edges: SearchMediaResult['edges'], + sessionId: number +): FlexBubble[] { + return edges + .map( + ( + { + node: { attachmentUrl, id, articleType }, + highlight, + mediaSimilarity, + }, + index + ): FlexBubble => { + const displayTextWhenChosen = CIRCLED_DIGITS[index + 1]; + + const { contents: highlightContents, source: highlightSource } = + createHighlightContents(highlight); + + const similarityPercentage = Math.round(mediaSimilarity * 100); + + const looks = + mediaSimilarity > 0 + ? t`Looks ${similarityPercentage}% similar` + : highlightSource === null + ? t`Similar file` + : t`Contains relevant text`; + + const bodyContents: FlexComponent[] = []; + + if (highlightSource) { + let highlightSourceInfo = ''; + switch (highlightSource) { + case 'hyperlinks': + highlightSourceInfo = t`(Text in the hyperlink)`; + break; + case 'text': + if (articleType !== 'TEXT') { + highlightSourceInfo = t`(Text in transcript)`; + } + } + + if (highlightSourceInfo) { + bodyContents.push({ + type: 'text', + text: highlightSourceInfo, + size: 'sm', + color: '#ff7b7b', + weight: 'bold', + }); + } + + bodyContents.push({ + type: 'text', + contents: highlightContents, + // Show less lines if there are thumbnails to show + maxLines: attachmentUrl ? 5 : 12, + flex: 0, + gravity: 'top', + weight: 'regular', + wrap: true, + }); + } + + return { + type: 'bubble', + direction: 'ltr', + header: { + type: 'box', + layout: 'horizontal', + spacing: 'sm', + paddingBottom: 'md', + contents: [ + { + type: 'text', + text: displayTextWhenChosen + ' ' + looks, + gravity: 'center', + size: 'sm', + weight: 'bold', + wrap: true, + color: '#AAAAAA', + }, + ], + }, + + // Show thumbnail image if available + hero: !attachmentUrl + ? undefined + : { + type: 'image', + url: attachmentUrl, + size: 'full', + }, + + // Show highlighted text if available + body: + bodyContents.length === 0 + ? undefined + : { + type: 'box', + layout: 'vertical', + contents: bodyContents, + }, + + footer: { + type: 'box', + layout: 'horizontal', + contents: [ + { + type: 'button', + action: createPostbackAction( + t`Choose this one`, + id, + t`I choose ${displayTextWhenChosen}`, + sessionId, + 'CHOOSING_ARTICLE' + ), + style: 'primary', + color: '#ffb600', + }, + ], + }, + }; + } + ) + .slice(0, 9); /* flex carousel has at most 10 bubbles */ }