diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/PDFParserNode.tsx b/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/PDFParserNode.tsx new file mode 100644 index 0000000000..a8c693674d --- /dev/null +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/PDFParserNode.tsx @@ -0,0 +1,138 @@ +import { HelpTooltip } from "@/components/HelpTooltip"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { useDeleteNodeCallback } from "@/routes/workflows/hooks/useDeleteNodeCallback"; +import { useNodeLabelChangeHandler } from "@/routes/workflows/hooks/useLabelChangeHandler"; +import { WorkflowBlockTypes } from "@/routes/workflows/types/workflowTypes"; +import { Handle, NodeProps, Position, useReactFlow } from "@xyflow/react"; +import { useState } from "react"; +import { helpTooltips } from "../../helpContent"; +import { EditableNodeTitle } from "../components/EditableNodeTitle"; +import { NodeActionMenu } from "../NodeActionMenu"; +import { WorkflowBlockIcon } from "../WorkflowBlockIcon"; +import { type PDFParserNode } from "./types"; +import { Checkbox } from "@/components/ui/checkbox"; +import { dataSchemaExampleForFileExtraction } from "../types"; +import { CodeEditor } from "@/routes/workflows/components/CodeEditor"; + +function PDFParserNode({ id, data }: NodeProps) { + const { updateNodeData } = useReactFlow(); + const deleteNodeCallback = useDeleteNodeCallback(); + const [inputs, setInputs] = useState({ + fileUrl: data.fileUrl, + dataSchema: data.jsonSchema, + }); + const [label, setLabel] = useNodeLabelChangeHandler({ + id, + initialValue: data.label, + }); + + function handleChange(key: string, value: unknown) { + if (!data.editable) { + return; + } + setInputs({ ...inputs, [key]: value }); + updateNodeData(id, { [key]: value }); + } + + return ( +
+ + +
+
+
+
+ +
+
+ + PDF Parser Block +
+
+ { + deleteNodeCallback(id); + }} + /> +
+
+
+
+ + +
+ { + if (!data.editable) { + return; + } + setInputs({ ...inputs, fileUrl: event.target.value }); + updateNodeData(id, { fileUrl: event.target.value }); + }} + className="nopan text-xs" + /> +
+
+
+
+ + +
+ { + handleChange( + "dataSchema", + checked + ? JSON.stringify( + dataSchemaExampleForFileExtraction, + null, + 2, + ) + : "null", + ); + }} + /> +
+ {inputs.dataSchema !== "null" && ( +
+ { + handleChange("dataSchema", value); + }} + className="nowheel nopan" + fontSize={8} + /> +
+ )} +
+
+
+
+ ); +} + +export { PDFParserNode }; diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/types.ts b/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/types.ts new file mode 100644 index 0000000000..d9ad6142f1 --- /dev/null +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/PDFParserNode/types.ts @@ -0,0 +1,17 @@ +import type { Node } from "@xyflow/react"; +import { NodeBaseData } from "../types"; + +export type PDFParserNodeData = NodeBaseData & { + fileUrl: string; + jsonSchema: string; +}; + +export type PDFParserNode = Node; + +export const pdfParserNodeDefaultData: PDFParserNodeData = { + editable: true, + label: "", + fileUrl: "", + continueOnFailure: false, + jsonSchema: "null", +} as const; diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/WorkflowBlockIcon.tsx b/skyvern-frontend/src/routes/workflows/editor/nodes/WorkflowBlockIcon.tsx index f024bbff94..71a635cfa1 100644 --- a/skyvern-frontend/src/routes/workflows/editor/nodes/WorkflowBlockIcon.tsx +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/WorkflowBlockIcon.tsx @@ -67,6 +67,9 @@ function WorkflowBlockIcon({ workflowBlockType, className }: Props) { case "wait": { return ; } + case "pdf_parser": { + return ; + } } } diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/index.ts b/skyvern-frontend/src/routes/workflows/editor/nodes/index.ts index ea590ac57f..8dbb587e4d 100644 --- a/skyvern-frontend/src/routes/workflows/editor/nodes/index.ts +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/index.ts @@ -33,6 +33,8 @@ import { WaitNode } from "./WaitNode/types"; import { WaitNode as WaitNodeComponent } from "./WaitNode/WaitNode"; import { FileDownloadNode } from "./FileDownloadNode/types"; import { FileDownloadNode as FileDownloadNodeComponent } from "./FileDownloadNode/FileDownloadNode"; +import { PDFParserNode } from "./PDFParserNode/types"; +import { PDFParserNode as PDFParserNodeComponent } from "./PDFParserNode/PDFParserNode"; export type UtilityNode = StartNode | NodeAdderNode; @@ -51,7 +53,8 @@ export type WorkflowBlockNode = | ExtractionNode | LoginNode | WaitNode - | FileDownloadNode; + | FileDownloadNode + | PDFParserNode; export function isUtilityNode(node: AppNode): node is UtilityNode { return node.type === "nodeAdder" || node.type === "start"; @@ -81,4 +84,5 @@ export const nodeTypes = { login: memo(LoginNodeComponent), wait: memo(WaitNodeComponent), fileDownload: memo(FileDownloadNodeComponent), + pdfParser: memo(PDFParserNodeComponent), } as const; diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts b/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts index 2409c5ae3d..bac41c2133 100644 --- a/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts @@ -17,6 +17,16 @@ export const dataSchemaExampleValue = { }, } as const; +export const dataSchemaExampleForFileExtraction = { + type: "object", + properties: { + extracted_information: { + type: "object", + description: "All of the information extracted from the file", + }, + }, +}; + export const workflowBlockTitle: { [blockType in WorkflowBlockType]: string; } = { @@ -35,4 +45,5 @@ export const workflowBlockTitle: { upload_to_s3: "Upload", validation: "Validation", wait: "Wait", + pdf_parser: "PDF Parser", }; diff --git a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx index ceb521b39c..f587f1e325 100644 --- a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx +++ b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx @@ -121,6 +121,17 @@ const nodeLibraryItems: Array<{ title: "File Parser Block", description: "Downloads and parses a file", }, + { + nodeType: "pdfParser", + icon: ( + + ), + title: "PDF Parser Block", + description: "Downloads and parses a PDF file with an optional data schema", + }, // disabled // { // nodeType: "download", diff --git a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts index d18d5fcb1a..2e41d1b4ba 100644 --- a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts +++ b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts @@ -32,6 +32,7 @@ import { LoginBlockYAML, WaitBlockYAML, FileDownloadBlockYAML, + PDFParserBlockYAML, } from "../types/workflowYamlTypes"; import { EMAIL_BLOCK_SENDER, @@ -84,6 +85,7 @@ import { loginNodeDefaultData } from "./nodes/LoginNode/types"; import { waitNodeDefaultData } from "./nodes/WaitNode/types"; import { fileDownloadNodeDefaultData } from "./nodes/FileDownloadNode/types"; import { ProxyLocation } from "@/api/types"; +import { pdfParserNodeDefaultData } from "./nodes/PDFParserNode/types"; export const NEW_NODE_LABEL_PREFIX = "block_"; @@ -394,6 +396,19 @@ function convertToNode( }; } + case "pdf_parser": { + return { + ...identifiers, + ...common, + type: "pdfParser", + data: { + ...commonData, + fileUrl: block.file_url, + jsonSchema: JSON.stringify(block.json_schema, null, 2), + }, + }; + } + case "download_to_s3": { return { ...identifiers, @@ -788,6 +803,17 @@ function createNode( }, }; } + case "pdfParser": { + return { + ...identifiers, + ...common, + type: "pdfParser", + data: { + ...pdfParserNodeDefaultData, + label, + }, + }; + } } } @@ -1020,6 +1046,14 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML { parameter_keys: node.data.parameterKeys, }; } + case "pdfParser": { + return { + ...base, + block_type: "pdf_parser", + file_url: node.data.fileUrl, + json_schema: JSONParseSafe(node.data.jsonSchema), + }; + } default: { throw new Error("Invalid node type for getWorkflowBlock"); } @@ -1642,6 +1676,15 @@ function convertBlocksToBlockYAML( }; return blockYaml; } + case "pdf_parser": { + const blockYaml: PDFParserBlockYAML = { + ...base, + block_type: "pdf_parser", + file_url: block.file_url, + json_schema: block.json_schema, + }; + return blockYaml; + } case "send_email": { const blockYaml: SendEmailBlockYAML = { ...base, diff --git a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts index de3f05a953..1fd92d95e4 100644 --- a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts +++ b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts @@ -154,7 +154,8 @@ export type WorkflowBlock = | ExtractionBlock | LoginBlock | WaitBlock - | FileDownloadBlock; + | FileDownloadBlock + | PDFParserBlock; export const WorkflowBlockTypes = { Task: "task", @@ -172,6 +173,7 @@ export const WorkflowBlockTypes = { Login: "login", Wait: "wait", FileDownload: "file_download", + PDFParser: "pdf_parser", } as const; export function isTaskVariantBlock(item: { @@ -369,6 +371,12 @@ export type FileDownloadBlock = WorkflowBlockBase & { cache_actions: boolean; }; +export type PDFParserBlock = WorkflowBlockBase & { + block_type: "pdf_parser"; + file_url: string; + json_schema: Record | null; +}; + export type WorkflowDefinition = { parameters: Array; blocks: Array; diff --git a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts index 801026c2d4..896c5bf58c 100644 --- a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts +++ b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts @@ -97,7 +97,8 @@ export type BlockYAML = | ExtractionBlockYAML | LoginBlockYAML | WaitBlockYAML - | FileDownloadBlockYAML; + | FileDownloadBlockYAML + | PDFParserBlockYAML; export type BlockYAMLBase = { block_type: WorkflowBlockType; @@ -265,3 +266,9 @@ export type ForLoopBlockYAML = BlockYAMLBase & { loop_blocks: Array; loop_variable_reference: string | null; }; + +export type PDFParserBlockYAML = BlockYAMLBase & { + block_type: "pdf_parser"; + file_url: string; + json_schema: Record | null; +};