Skip to content
This repository has been archived by the owner on Aug 15, 2023. It is now read-only.

Commit

Permalink
fix(tk:shared): moved helper to build metadata from tk:backend
Browse files Browse the repository at this point in the history
  • Loading branch information
ascariandrea committed Oct 4, 2022
1 parent eda3fe1 commit ff65a76
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 129 deletions.
5 changes: 2 additions & 3 deletions platforms/tktrex/backend/__tests__/native.e2e.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ import { parseISO, subMinutes } from 'date-fns';
import path from 'path';
import nacl from 'tweetnacl';
import { GetTest, Test } from '../test/Test';
import { toMetadata } from '@tktrex/shared/parser/metadata';
import {
addDom,
buildMetadata,
getLastHTMLs,
getMetadata,
getMetadataSchema,
Expand Down Expand Up @@ -93,7 +93,7 @@ describe('Parser: "native"', () => {
getContributions: getLastHTMLs(db),
getMetadata: getMetadata(db),
saveResults: updateMetadataAndMarkHTML(db),
buildMetadata: buildMetadata,
buildMetadata: toMetadata,
config: parserConfig,
expectSources: (receivedSources) => {
receivedSources.forEach((s) => {
Expand All @@ -113,7 +113,6 @@ describe('Parser: "native"', () => {
id: _receivedId,
clientTime: clientTimeExp,
savingTime: savingTimeExp,
type: typeExp,
...expectedM
} = expectedMetadata as any;

Expand Down
4 changes: 2 additions & 2 deletions platforms/tktrex/backend/bin/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ import nconf from 'nconf';
import path from 'path';
import {
addDom,
buildMetadata,
getLastHTMLs,
getMetadata,
parserConfig,
updateMetadataAndMarkHTML,
} from '../lib/parser';
import { toMetadata } from '@tktrex/shared/parser/metadata';

nconf.argv().env().file({ file: 'config/settings.json' });

Expand Down Expand Up @@ -85,7 +85,7 @@ const run = async (): Promise<void> => {
getContributions: getLastHTMLs(db),
saveResults: updateMetadataAndMarkHTML(db),
getEntryId: (e) => e.html.id,
buildMetadata,
buildMetadata: toMetadata,
getEntryDate: (e) => e.html.savingTime,
getEntryNatureType: (e) => e.html.type,
config: {
Expand Down
125 changes: 1 addition & 124 deletions platforms/tktrex/backend/lib/parser.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import {
BuildMetadataFn,
ContributionAndDOMFn,
GetContributionsFn,
GetMetadataFn,
Expand All @@ -8,14 +7,12 @@ import {
} from '@shared/providers/parser.provider';
import { sanitizeHTML } from '@shared/utils/html.utils';
import { TKMetadata } from '@tktrex/shared/models/Metadata';
import { TKParsers } from '@tktrex/shared/parser/parsers';
import { TKParserConfig } from '@tktrex/shared/parser/config';
import { HTMLSource } from '@tktrex/shared/parser/source';
import { isValid } from 'date-fns';
import D from 'debug';
import { JSDOM } from 'jsdom';
import _ from 'lodash';
import nconf from 'nconf';
import { JSDOM } from 'jsdom';

const debug = D('lib:parserchain');

Expand All @@ -41,126 +38,6 @@ export const addDom: ContributionAndDOMFn<HTMLSource> = (e) => ({
jsdom: new JSDOM(sanitizeHTML(e.html.html)).window.document,
});

export const buildMetadata: BuildMetadataFn<
HTMLSource,
TKMetadata,
TKParsers
> = (entry) => {
// this contains the original .source (html, impression, timeline), the .findings and .failures
// the metadata is aggregated by unit and not unrolled in any way
if (!entry?.findings?.nature) return null;

let metadata: any = {
clientTime: entry.source.html.clientTime,
};

switch (entry.findings.nature.type) {
case 'foryou': {
const {
nature,
author,
description,
hashtags,
metrics,
music,
downloader,
} = entry.findings;
metadata = {
...metadata,
...nature,
nature,
...description,
author,
metrics,
music,
hashtags,
...downloader,
};
break;
}
case 'search': {
const { nature, downloader, search } = entry.findings;
metadata = {
...metadata,
...nature,
nature,
...downloader,
...search,
};
metadata.query = _.toLower(metadata.query);
metadata.nature.query = metadata.query;
break;
}
case 'profile': {
const { nature, profile, downloader } = entry.findings;
metadata = {
...metadata,
nature,
...nature,
...downloader,
...profile,
};
break;
}
case 'video':
case 'native': {
const {
nature,
description,
music,
hashtags,
metrics,
stitch,
author,
downloader,
native,
} = entry.findings;
metadata = {
...nature,
nature,
...description,
music,
hashtags,
metrics,
stitch,
author,
...downloader,
...native,
};
break;
}
default: {
metadata = {
...metadata,
...entry.findings,
...entry.findings.nature,
};
}
}

/* fixed fields */
metadata.savingTime = isValid(entry.source.html.savingTime)
? entry.source.html.savingTime.toISOString()
: entry.source.html.savingTime;
metadata.clientTime = isValid(entry.source.html.clientTime)
? entry.source.html.clientTime.toISOString()
: entry.source.html.clientTime;
metadata.id = entry.source.html.id;
metadata.publicKey = entry.source.html.publicKey;
metadata.timelineId = entry.source.html.timelineId;
metadata.order = entry.source.html.n?.[0];

/* optional fields */
if (entry.source.html.geoip?.length === 2)
metadata.geoip = entry.source.html.geoip;
if (entry.source.html.researchTag?.length)
metadata.researchTag = entry.source.html.researchTag;
if (entry.source.html.experimentId?.length)
metadata.experimentId = entry.source.html.experimentId;

return metadata;
};

export const getLastHTMLs =
(db: ParserProviderContextDB): GetContributionsFn<HTMLSource> =>
async (filter, skip, amount) => {
Expand Down
1 change: 1 addition & 0 deletions platforms/tktrex/shared/src/parser/metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const toMetadata: BuildMetadataFn<HTMLSource, TKMetadata, TKParsers> = (

let metadata: any = {
clientTime: entry.source.html.clientTime,
thumbnails: [],
};

switch (entry.findings.nature.type) {
Expand Down

0 comments on commit ff65a76

Please sign in to comment.