diff --git a/platforms/tktrex/backend/__tests__/native.e2e.ts b/platforms/tktrex/backend/__tests__/native.e2e.ts index 74e0f0755..979967dbe 100644 --- a/platforms/tktrex/backend/__tests__/native.e2e.ts +++ b/platforms/tktrex/backend/__tests__/native.e2e.ts @@ -11,9 +11,9 @@ import { parseISO, subMinutes } from 'date-fns'; import path from 'path'; import nacl from 'tweetnacl'; import { GetTest, Test } from '../test/Test'; +import { toMetadata } from '@tktrex/shared/parser/metadata'; import { addDom, - buildMetadata, getLastHTMLs, getMetadata, getMetadataSchema, @@ -93,7 +93,7 @@ describe('Parser: "native"', () => { getContributions: getLastHTMLs(db), getMetadata: getMetadata(db), saveResults: updateMetadataAndMarkHTML(db), - buildMetadata: buildMetadata, + buildMetadata: toMetadata, config: parserConfig, expectSources: (receivedSources) => { receivedSources.forEach((s) => { @@ -113,7 +113,6 @@ describe('Parser: "native"', () => { id: _receivedId, clientTime: clientTimeExp, savingTime: savingTimeExp, - type: typeExp, ...expectedM } = expectedMetadata as any; diff --git a/platforms/tktrex/backend/bin/parser.ts b/platforms/tktrex/backend/bin/parser.ts index 95f60575d..02df78412 100644 --- a/platforms/tktrex/backend/bin/parser.ts +++ b/platforms/tktrex/backend/bin/parser.ts @@ -11,12 +11,12 @@ import nconf from 'nconf'; import path from 'path'; import { addDom, - buildMetadata, getLastHTMLs, getMetadata, parserConfig, updateMetadataAndMarkHTML, } from '../lib/parser'; +import { toMetadata } from '@tktrex/shared/parser/metadata'; nconf.argv().env().file({ file: 'config/settings.json' }); @@ -85,7 +85,7 @@ const run = async (): Promise => { getContributions: getLastHTMLs(db), saveResults: updateMetadataAndMarkHTML(db), getEntryId: (e) => e.html.id, - buildMetadata, + buildMetadata: toMetadata, getEntryDate: (e) => e.html.savingTime, getEntryNatureType: (e) => e.html.type, config: { diff --git a/platforms/tktrex/backend/lib/parser.ts b/platforms/tktrex/backend/lib/parser.ts index 7229e7529..9b39d8184 100644 --- a/platforms/tktrex/backend/lib/parser.ts +++ b/platforms/tktrex/backend/lib/parser.ts @@ -1,5 +1,4 @@ import { - BuildMetadataFn, ContributionAndDOMFn, GetContributionsFn, GetMetadataFn, @@ -8,14 +7,12 @@ import { } from '@shared/providers/parser.provider'; import { sanitizeHTML } from '@shared/utils/html.utils'; import { TKMetadata } from '@tktrex/shared/models/Metadata'; -import { TKParsers } from '@tktrex/shared/parser/parsers'; import { TKParserConfig } from '@tktrex/shared/parser/config'; import { HTMLSource } from '@tktrex/shared/parser/source'; -import { isValid } from 'date-fns'; import D from 'debug'; +import { JSDOM } from 'jsdom'; import _ from 'lodash'; import nconf from 'nconf'; -import { JSDOM } from 'jsdom'; const debug = D('lib:parserchain'); @@ -41,126 +38,6 @@ export const addDom: ContributionAndDOMFn = (e) => ({ jsdom: new JSDOM(sanitizeHTML(e.html.html)).window.document, }); -export const buildMetadata: BuildMetadataFn< - HTMLSource, - TKMetadata, - TKParsers -> = (entry) => { - // this contains the original .source (html, impression, timeline), the .findings and .failures - // the metadata is aggregated by unit and not unrolled in any way - if (!entry?.findings?.nature) return null; - - let metadata: any = { - clientTime: entry.source.html.clientTime, - }; - - switch (entry.findings.nature.type) { - case 'foryou': { - const { - nature, - author, - description, - hashtags, - metrics, - music, - downloader, - } = entry.findings; - metadata = { - ...metadata, - ...nature, - nature, - ...description, - author, - metrics, - music, - hashtags, - ...downloader, - }; - break; - } - case 'search': { - const { nature, downloader, search } = entry.findings; - metadata = { - ...metadata, - ...nature, - nature, - ...downloader, - ...search, - }; - metadata.query = _.toLower(metadata.query); - metadata.nature.query = metadata.query; - break; - } - case 'profile': { - const { nature, profile, downloader } = entry.findings; - metadata = { - ...metadata, - nature, - ...nature, - ...downloader, - ...profile, - }; - break; - } - case 'video': - case 'native': { - const { - nature, - description, - music, - hashtags, - metrics, - stitch, - author, - downloader, - native, - } = entry.findings; - metadata = { - ...nature, - nature, - ...description, - music, - hashtags, - metrics, - stitch, - author, - ...downloader, - ...native, - }; - break; - } - default: { - metadata = { - ...metadata, - ...entry.findings, - ...entry.findings.nature, - }; - } - } - - /* fixed fields */ - metadata.savingTime = isValid(entry.source.html.savingTime) - ? entry.source.html.savingTime.toISOString() - : entry.source.html.savingTime; - metadata.clientTime = isValid(entry.source.html.clientTime) - ? entry.source.html.clientTime.toISOString() - : entry.source.html.clientTime; - metadata.id = entry.source.html.id; - metadata.publicKey = entry.source.html.publicKey; - metadata.timelineId = entry.source.html.timelineId; - metadata.order = entry.source.html.n?.[0]; - - /* optional fields */ - if (entry.source.html.geoip?.length === 2) - metadata.geoip = entry.source.html.geoip; - if (entry.source.html.researchTag?.length) - metadata.researchTag = entry.source.html.researchTag; - if (entry.source.html.experimentId?.length) - metadata.experimentId = entry.source.html.experimentId; - - return metadata; -}; - export const getLastHTMLs = (db: ParserProviderContextDB): GetContributionsFn => async (filter, skip, amount) => { diff --git a/platforms/tktrex/shared/src/parser/metadata.ts b/platforms/tktrex/shared/src/parser/metadata.ts index 0c5126d3f..f0df8227b 100644 --- a/platforms/tktrex/shared/src/parser/metadata.ts +++ b/platforms/tktrex/shared/src/parser/metadata.ts @@ -14,6 +14,7 @@ export const toMetadata: BuildMetadataFn = ( let metadata: any = { clientTime: entry.source.html.clientTime, + thumbnails: [], }; switch (entry.findings.nature.type) {