From a07c2cc4ac851ba1907fdfd8a3e7a857857651c9 Mon Sep 17 00:00:00 2001 From: Kasper Fyhn Date: Fri, 6 Dec 2024 11:23:42 +0100 Subject: [PATCH] Showing documents with triplets marked up --- .../relationextraction/data_classes.py | 2 + visualizer/package-lock.json | 138 ++++++++++++++++++ visualizer/package.json | 2 + visualizer/src/docs/DocService.ts | 38 ++++- visualizer/src/graph/GraphViewer.tsx | 4 +- visualizer/src/inspector/DocInfo.tsx | 121 ++++++++++++++- visualizer/src/inspector/EdgeInfo.tsx | 33 +---- visualizer/src/inspector/NodeInfo.tsx | 2 +- visualizer/src/inspector/StatsInfo.tsx | 9 +- visualizer/src/inspector/docinfo.css | 29 ++++ 10 files changed, 328 insertions(+), 50 deletions(-) create mode 100644 visualizer/src/inspector/docinfo.css diff --git a/src/conspiracies/docprocessing/relationextraction/data_classes.py b/src/conspiracies/docprocessing/relationextraction/data_classes.py index 5370451..4ac5276 100644 --- a/src/conspiracies/docprocessing/relationextraction/data_classes.py +++ b/src/conspiracies/docprocessing/relationextraction/data_classes.py @@ -264,7 +264,9 @@ def span_to_json(span: Union[Span, Doc]) -> Dict[str, Any]: span = span[:] return { "text": span.text, + "start_char": span.start_char, "start": span.start, + "end_char": span.end_char, "end": span.end, } diff --git a/visualizer/package-lock.json b/visualizer/package-lock.json index 16425f6..d3726cc 100644 --- a/visualizer/package-lock.json +++ b/visualizer/package-lock.json @@ -14,10 +14,12 @@ "@types/jest": "^27.5.2", "@types/node": "^16.18.96", "@types/react-dom": "^18.2.24", + "draft-js": "^0.11.7", "multi-range-slider-react": "^2.0.7", "react": "^18.2.0", "react-dom": "^18.2.0", "react-graph-vis": "^1.0.7", + "react-highlight-within-textarea": "^3.2.2", "react-router-dom": "^6.22.3", "react-scripts": "5.0.1", "react-vis-graph-wrapper": "^0.1.3", @@ -6516,6 +6518,14 @@ "integrity": "sha512-+R08/oI0nl3vfPcqftZRpytksBXDzOUveBq/NBVx0sUp1axwzPQrKinNx5yd5sxPu8j1wIy8AfnVQ+5eFdha6Q==", "dev": true }, + "node_modules/cross-fetch": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.8.tgz", + "integrity": "sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg==", + "dependencies": { + "node-fetch": "^2.6.12" + } + }, "node_modules/cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -7417,6 +7427,20 @@ "resolved": "https://registry.npmjs.org/dotenv-expand/-/dotenv-expand-5.1.0.tgz", "integrity": "sha512-YXQl1DSa4/PQyRfgrv6aoNjhasp/p4qs9FjJ4q4cQk+8m4r6k4ZSiEyytKG8f8W9gi8WsQtIObNmKd+tMzNTmA==" }, + "node_modules/draft-js": { + "version": "0.11.7", + "resolved": "https://registry.npmjs.org/draft-js/-/draft-js-0.11.7.tgz", + "integrity": "sha512-ne7yFfN4sEL82QPQEn80xnADR8/Q6ALVworbC5UOSzOvjffmYfFsr3xSZtxbIirti14R7Y33EZC5rivpLgIbsg==", + "dependencies": { + "fbjs": "^2.0.0", + "immutable": "~3.7.4", + "object-assign": "^4.1.1" + }, + "peerDependencies": { + "react": ">=0.14.0", + "react-dom": ">=0.14.0" + } + }, "node_modules/duplexer": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.2.tgz", @@ -8609,6 +8633,34 @@ "bser": "2.1.1" } }, + "node_modules/fbjs": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fbjs/-/fbjs-2.0.0.tgz", + "integrity": "sha512-8XA8ny9ifxrAWlyhAbexXcs3rRMtxWcs3M0lctLfB49jRDHiaxj+Mo0XxbwE7nKZYzgCFoq64FS+WFd4IycPPQ==", + "dependencies": { + "core-js": "^3.6.4", + "cross-fetch": "^3.0.4", + "fbjs-css-vars": "^1.0.0", + "loose-envify": "^1.0.0", + "object-assign": "^4.1.0", + "promise": "^7.1.1", + "setimmediate": "^1.0.5", + "ua-parser-js": "^0.7.18" + } + }, + "node_modules/fbjs-css-vars": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/fbjs-css-vars/-/fbjs-css-vars-1.0.2.tgz", + "integrity": "sha512-b2XGFAFdWZWg0phtAWLHCk836A1Xann+I+Dgd3Gk64MHKZO44FfoD1KxyvbSh0qZsIoXQGGlVztIY+oitJPpRQ==" + }, + "node_modules/fbjs/node_modules/promise": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/promise/-/promise-7.3.1.tgz", + "integrity": "sha512-nolQXZ/4L+bP/UGlkfaIujX9BKxGwmQ9OT4mOt5yvy8iK1h3wqTEJCijzGANTCCl9nWjY41juyAn2K3Q1hLLTg==", + "dependencies": { + "asap": "~2.0.3" + } + }, "node_modules/fd-slicer": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", @@ -9791,6 +9843,14 @@ "url": "https://opencollective.com/immer" } }, + "node_modules/immutable": { + "version": "3.7.6", + "resolved": "https://registry.npmjs.org/immutable/-/immutable-3.7.6.tgz", + "integrity": "sha512-AizQPcaofEtO11RZhPPHBOJRdo/20MKQF9mBLnVkBoyHi1/zXK8fzVdnEpSV9gxqtnh6Qomfp3F0xT5qP/vThw==", + "engines": { + "node": ">=0.8.0" + } + }, "node_modules/import-fresh": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", @@ -12160,6 +12220,44 @@ "tslib": "^2.0.3" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/node-fetch/node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, + "node_modules/node-fetch/node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/node-fetch/node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/node-forge": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.1.tgz", @@ -14550,6 +14648,16 @@ "integrity": "sha512-FULf7fayPdpASncVy4DLh3xydlXEJJpvIELjYjNeQWYUZ9pclcpvCZSr2gkmN2FrrGcI7G/cJsIEwk5/8vfXpg==", "deprecated": "Please upgrade to version 7 or higher. Older versions may use Math.random() in certain circumstances, which is known to be problematic. See https://v8.dev/blog/math-random for details." }, + "node_modules/react-highlight-within-textarea": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/react-highlight-within-textarea/-/react-highlight-within-textarea-3.2.2.tgz", + "integrity": "sha512-pS+tPi6//dM8V154/0SfSqkx+0i6lKpSKazLZa7+RQjNQg0wKeCZBVkOGtxAhsVJy5KWpfIfdcpE8JpZ2Giz/g==", + "peerDependencies": { + "draft-js": ">=0.11.7", + "react": ">=0.14.0", + "react-dom": ">=0.14.0" + } + }, "node_modules/react-is": { "version": "17.0.2", "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz", @@ -15793,6 +15901,11 @@ "node": ">= 0.4" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "node_modules/setprototypeof": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", @@ -17165,6 +17278,31 @@ "node": ">=4.2.0" } }, + "node_modules/ua-parser-js": { + "version": "0.7.39", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.39.tgz", + "integrity": "sha512-IZ6acm6RhQHNibSt7+c09hhvsKy9WUr4DVbeq9U8o71qxyYtJpQeDxQnMrVqnIFMLcQjHO0I9wgfO2vIahht4w==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/ua-parser-js" + }, + { + "type": "paypal", + "url": "https://paypal.me/faisalman" + }, + { + "type": "github", + "url": "https://github.com/sponsors/faisalman" + } + ], + "bin": { + "ua-parser-js": "script/cli.js" + }, + "engines": { + "node": "*" + } + }, "node_modules/unbox-primitive": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", diff --git a/visualizer/package.json b/visualizer/package.json index c64ff2e..745725b 100644 --- a/visualizer/package.json +++ b/visualizer/package.json @@ -9,10 +9,12 @@ "@types/jest": "^27.5.2", "@types/node": "^16.18.96", "@types/react-dom": "^18.2.24", + "draft-js": "^0.11.7", "multi-range-slider-react": "^2.0.7", "react": "^18.2.0", "react-dom": "^18.2.0", "react-graph-vis": "^1.0.7", + "react-highlight-within-textarea": "^3.2.2", "react-router-dom": "^6.22.3", "react-scripts": "5.0.1", "react-vis-graph-wrapper": "^0.1.3", diff --git a/visualizer/src/docs/DocService.ts b/visualizer/src/docs/DocService.ts index 1d3731a..a017fe1 100644 --- a/visualizer/src/docs/DocService.ts +++ b/visualizer/src/docs/DocService.ts @@ -12,10 +12,25 @@ export abstract class DocService { } } +export interface TripletField { + text: string; + start_char: number; + start: number; + end_char: number; + end: number; +} + +export interface Triplet { + subject: TripletField; + predicate: TripletField; + object: TripletField; +} + export interface Doc { id: string; text: string; timestamp: string; + semantic_triplets: Triplet[]; } export class SampleDocService extends DocService { @@ -25,16 +40,19 @@ export class SampleDocService extends DocService { id: "1", text: "sample text 1", timestamp: "", + semantic_triplets: [], }, { id: "2", text: "sample text 1", timestamp: "", + semantic_triplets: [], }, { id: "3", text: "sample text 1", timestamp: "", + semantic_triplets: [], }, ].map((d) => [d.id, d]), ); @@ -58,14 +76,18 @@ export class FileDocService extends DocService { constructor(docData: Doc[]) { super(); this.docData = new Map( - docData.map((d) => [ - d.id, - { - id: d.id, - text: d.text, - timestamp: d.timestamp, - }, - ]), + docData + .filter((d) => d.semantic_triplets !== undefined) + .map((d) => [ + d.id, + { + id: d.id, + text: d.text, + timestamp: d.timestamp, + semantic_triplets: d.semantic_triplets, + }, + ]), ); + console.log(this.docData.size); } } diff --git a/visualizer/src/graph/GraphViewer.tsx b/visualizer/src/graph/GraphViewer.tsx index 043ee87..333d4d0 100644 --- a/visualizer/src/graph/GraphViewer.tsx +++ b/visualizer/src/graph/GraphViewer.tsx @@ -10,7 +10,7 @@ import { useServiceContext } from "../service/ServiceContextProvider"; export interface GraphViewerProps {} export const GraphViewer: React.FC = () => { - const { getGraphService, getDocService } = useServiceContext(); + const { getGraphService } = useServiceContext(); const top50 = getGraphService() @@ -45,7 +45,7 @@ export const GraphViewer: React.FC = () => { ? getGraphService().getSubGraph(subgraphNodes) : getGraphService().getGraph(); return filter(graphFilter, baseGraphData); - }, [graphFilter, subgraphNodes]); + }, [getGraphService, graphFilter, subgraphNodes]); const graphDataMaps = useMemo(() => { return { diff --git a/visualizer/src/inspector/DocInfo.tsx b/visualizer/src/inspector/DocInfo.tsx index 401414c..98b325d 100644 --- a/visualizer/src/inspector/DocInfo.tsx +++ b/visualizer/src/inspector/DocInfo.tsx @@ -1,18 +1,127 @@ -import React from "react"; -import { Doc } from "../docs/DocService"; +import React, { PropsWithChildren } from "react"; +import { Doc, Triplet } from "../docs/DocService"; +import HighlightWithinTextarea from "react-highlight-within-textarea"; +import "./docinfo.css"; + +const BlueHighlight: React.FC = (props) => { + return ( + + {props.children} + + ); +}; + +const GreenHighlight: React.FC = (props) => { + return ( + + {props.children} + + ); +}; + +const RedHighlight: React.FC = (props) => { + return ( + {props.children} + ); +}; + +interface HighlightedTextProps { + text: string; + triplets: Triplet[]; + highlightLabels: string[]; +} + +const HighlightedText: React.FC = ({ + text, + triplets, + highlightLabels, +}) => { + const subjects = []; + const highlightSubjects = []; + const predicates = []; + const highlightPredicates = []; + const objects = []; + const highlightObjects = []; + + for (let triplet of triplets) { + const subject = triplet.subject; + const subjectSpan = [subject.start_char, subject.end_char]; + if (highlightLabels.indexOf(subject.text) > -1) { + highlightSubjects.push(subjectSpan); + } else { + subjects.push(subjectSpan); + } + const predicate = triplet.predicate; + const predicateSpan = [predicate.start_char, predicate.end_char]; + if (highlightLabels.indexOf(predicate.text) > -1) { + highlightPredicates.push(predicateSpan); + } else { + predicates.push(predicateSpan); + } + const object = triplet.object; + const objectSpan = [object.start_char, object.end_char]; + if (highlightLabels.indexOf(object.text) > -1) { + highlightObjects.push(objectSpan); + } else { + objects.push(objectSpan); + } + } + + return ( + + ); +}; export interface DocInfoProps { document: Doc; + highlightLabels: string[]; } -export const DocInfo: React.FC = ({ document }) => { +export const DocInfo: React.FC = ({ + document, + highlightLabels, +}) => { return (
-

{document.id}

- {document.timestamp} -

{document.text}

+

+ {document.id} {document.timestamp} +

+
); }; diff --git a/visualizer/src/inspector/EdgeInfo.tsx b/visualizer/src/inspector/EdgeInfo.tsx index bd06bdb..f73425c 100644 --- a/visualizer/src/inspector/EdgeInfo.tsx +++ b/visualizer/src/inspector/EdgeInfo.tsx @@ -1,5 +1,6 @@ import { EdgeGroup } from "../graph/GraphService"; import React from "react"; +import { StatsInfo } from "./StatsInfo"; export interface EdgeInfoProps { edges: EdgeGroup; @@ -12,37 +13,7 @@ export const EdgeInfo: React.FC = ({ edges }: EdgeInfoProps) => { {edges.group!.map((e, i) => (
{e.label} -
-

Frequency: {e.stats.frequency}

- {/*

Norm. frequency: {e.stats.norm_frequency?.toPrecision(3)}

*/} - {e.stats.first_occurrence && ( -

Earliest date: {e.stats.first_occurrence}

- )} - {e.stats.last_occurrence && ( -

Latest date: {e.stats.last_occurrence}

- )} - {e.stats.alt_labels && ( -
- Alternative Labels: -
    - {e.stats.alt_labels.map((l) => ( -
  • {l}
  • - ))} -
-
- )} - {e.stats.docs && ( -
- Documents -
    - {e.stats.docs.map((d) => ( -
  • {d}
  • - ))} -
-
- )} -
- {i < edges.group!.length - 1 &&
} +
))} diff --git a/visualizer/src/inspector/NodeInfo.tsx b/visualizer/src/inspector/NodeInfo.tsx index 631fc12..3785a5e 100644 --- a/visualizer/src/inspector/NodeInfo.tsx +++ b/visualizer/src/inspector/NodeInfo.tsx @@ -15,7 +15,7 @@ export const NodeInfo: React.FC = ({
{node.label}
- +
); }; diff --git a/visualizer/src/inspector/StatsInfo.tsx b/visualizer/src/inspector/StatsInfo.tsx index e9b75e9..8676b8d 100644 --- a/visualizer/src/inspector/StatsInfo.tsx +++ b/visualizer/src/inspector/StatsInfo.tsx @@ -4,10 +4,11 @@ import { Stats } from "../graph/GraphService"; import { useServiceContext } from "../service/ServiceContextProvider"; export interface StatsInfoProps { + label: string; stats: Stats; } -export const StatsInfo: React.FC = ({ stats }) => { +export const StatsInfo: React.FC = ({ label, stats }) => { const { getDocService } = useServiceContext(); return ( @@ -42,7 +43,11 @@ export const StatsInfo: React.FC = ({ stats }) => { {getDocService() .getDocs(stats.docs) .map((d) => ( - + ))} )} diff --git a/visualizer/src/inspector/docinfo.css b/visualizer/src/inspector/docinfo.css new file mode 100644 index 0000000..f3ba462 --- /dev/null +++ b/visualizer/src/inspector/docinfo.css @@ -0,0 +1,29 @@ +.highlight-subject { + background: cyan; + opacity: 1; +} + +.subject { + background: cyan; + opacity: 0.3; +} + +.highlight-predicate { + background: lightgreen; + opacity: 1; +} + +.predicate { + background: lightgreen; + opacity: 0.3; +} + +.highlight-object { + background: yellow; + opacity: 1; +} + +.object { + background: yellow; + opacity: 0.3; +}