Skip to content

Commit

Permalink
Add PDF Parser node in frontend (#1604)
Browse files Browse the repository at this point in the history
  • Loading branch information
wintonzheng authored Jan 21, 2025
1 parent 1796af6 commit 619d72a
Show file tree
Hide file tree
Showing 9 changed files with 245 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { HelpTooltip } from "@/components/HelpTooltip";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { useDeleteNodeCallback } from "@/routes/workflows/hooks/useDeleteNodeCallback";
import { useNodeLabelChangeHandler } from "@/routes/workflows/hooks/useLabelChangeHandler";
import { WorkflowBlockTypes } from "@/routes/workflows/types/workflowTypes";
import { Handle, NodeProps, Position, useReactFlow } from "@xyflow/react";
import { useState } from "react";
import { helpTooltips } from "../../helpContent";
import { EditableNodeTitle } from "../components/EditableNodeTitle";
import { NodeActionMenu } from "../NodeActionMenu";
import { WorkflowBlockIcon } from "../WorkflowBlockIcon";
import { type PDFParserNode } from "./types";
import { Checkbox } from "@/components/ui/checkbox";
import { dataSchemaExampleForFileExtraction } from "../types";
import { CodeEditor } from "@/routes/workflows/components/CodeEditor";

function PDFParserNode({ id, data }: NodeProps<PDFParserNode>) {
const { updateNodeData } = useReactFlow();
const deleteNodeCallback = useDeleteNodeCallback();
const [inputs, setInputs] = useState({
fileUrl: data.fileUrl,
dataSchema: data.jsonSchema,
});
const [label, setLabel] = useNodeLabelChangeHandler({
id,
initialValue: data.label,
});

function handleChange(key: string, value: unknown) {
if (!data.editable) {
return;
}
setInputs({ ...inputs, [key]: value });
updateNodeData(id, { [key]: value });
}

return (
<div>
<Handle
type="source"
position={Position.Bottom}
id="a"
className="opacity-0"
/>
<Handle
type="target"
position={Position.Top}
id="b"
className="opacity-0"
/>
<div className="w-[30rem] space-y-4 rounded-lg bg-slate-elevation3 px-6 py-4">
<div className="flex h-[2.75rem] justify-between">
<div className="flex gap-2">
<div className="flex h-[2.75rem] w-[2.75rem] items-center justify-center rounded border border-slate-600">
<WorkflowBlockIcon
workflowBlockType={WorkflowBlockTypes.PDFParser}
className="size-6"
/>
</div>
<div className="flex flex-col gap-1">
<EditableNodeTitle
value={label}
editable={data.editable}
onChange={setLabel}
titleClassName="text-base"
inputClassName="text-base"
/>
<span className="text-xs text-slate-400">PDF Parser Block</span>
</div>
</div>
<NodeActionMenu
onDelete={() => {
deleteNodeCallback(id);
}}
/>
</div>
<div className="space-y-4">
<div className="space-y-2">
<div className="flex gap-2">
<Label className="text-xs text-slate-300">File URL</Label>
<HelpTooltip content={helpTooltips["fileParser"]["fileUrl"]} />
</div>
<Input
value={inputs.fileUrl}
onChange={(event) => {
if (!data.editable) {
return;
}
setInputs({ ...inputs, fileUrl: event.target.value });
updateNodeData(id, { fileUrl: event.target.value });
}}
className="nopan text-xs"
/>
</div>
<div className="space-y-2">
<div className="flex gap-4">
<div className="flex gap-2">
<Label className="text-xs text-slate-300">Data Schema</Label>
<HelpTooltip content={helpTooltips["task"]["dataSchema"]} />
</div>
<Checkbox
checked={inputs.dataSchema !== "null"}
onCheckedChange={(checked) => {
handleChange(
"dataSchema",
checked
? JSON.stringify(
dataSchemaExampleForFileExtraction,
null,
2,
)
: "null",
);
}}
/>
</div>
{inputs.dataSchema !== "null" && (
<div>
<CodeEditor
language="json"
value={inputs.dataSchema}
onChange={(value) => {
handleChange("dataSchema", value);
}}
className="nowheel nopan"
fontSize={8}
/>
</div>
)}
</div>
</div>
</div>
</div>
);
}

export { PDFParserNode };
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import type { Node } from "@xyflow/react";
import { NodeBaseData } from "../types";

export type PDFParserNodeData = NodeBaseData & {
fileUrl: string;
jsonSchema: string;
};

export type PDFParserNode = Node<PDFParserNodeData, "pdfParser">;

export const pdfParserNodeDefaultData: PDFParserNodeData = {
editable: true,
label: "",
fileUrl: "",
continueOnFailure: false,
jsonSchema: "null",
} as const;
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ function WorkflowBlockIcon({ workflowBlockType, className }: Props) {
case "wait": {
return <StopwatchIcon className={className} />;
}
case "pdf_parser": {
return <CursorTextIcon className={className} />;
}
}
}

Expand Down
6 changes: 5 additions & 1 deletion skyvern-frontend/src/routes/workflows/editor/nodes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ import { WaitNode } from "./WaitNode/types";
import { WaitNode as WaitNodeComponent } from "./WaitNode/WaitNode";
import { FileDownloadNode } from "./FileDownloadNode/types";
import { FileDownloadNode as FileDownloadNodeComponent } from "./FileDownloadNode/FileDownloadNode";
import { PDFParserNode } from "./PDFParserNode/types";
import { PDFParserNode as PDFParserNodeComponent } from "./PDFParserNode/PDFParserNode";

export type UtilityNode = StartNode | NodeAdderNode;

Expand All @@ -51,7 +53,8 @@ export type WorkflowBlockNode =
| ExtractionNode
| LoginNode
| WaitNode
| FileDownloadNode;
| FileDownloadNode
| PDFParserNode;

export function isUtilityNode(node: AppNode): node is UtilityNode {
return node.type === "nodeAdder" || node.type === "start";
Expand Down Expand Up @@ -81,4 +84,5 @@ export const nodeTypes = {
login: memo(LoginNodeComponent),
wait: memo(WaitNodeComponent),
fileDownload: memo(FileDownloadNodeComponent),
pdfParser: memo(PDFParserNodeComponent),
} as const;
11 changes: 11 additions & 0 deletions skyvern-frontend/src/routes/workflows/editor/nodes/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ export const dataSchemaExampleValue = {
},
} as const;

export const dataSchemaExampleForFileExtraction = {
type: "object",
properties: {
extracted_information: {
type: "object",
description: "All of the information extracted from the file",
},
},
};

export const workflowBlockTitle: {
[blockType in WorkflowBlockType]: string;
} = {
Expand All @@ -35,4 +45,5 @@ export const workflowBlockTitle: {
upload_to_s3: "Upload",
validation: "Validation",
wait: "Wait",
pdf_parser: "PDF Parser",
};
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ const nodeLibraryItems: Array<{
title: "File Parser Block",
description: "Downloads and parses a file",
},
{
nodeType: "pdfParser",
icon: (
<WorkflowBlockIcon
workflowBlockType={WorkflowBlockTypes.PDFParser}
className="size-6"
/>
),
title: "PDF Parser Block",
description: "Downloads and parses a PDF file with an optional data schema",
},
// disabled
// {
// nodeType: "download",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
LoginBlockYAML,
WaitBlockYAML,
FileDownloadBlockYAML,
PDFParserBlockYAML,
} from "../types/workflowYamlTypes";
import {
EMAIL_BLOCK_SENDER,
Expand Down Expand Up @@ -84,6 +85,7 @@ import { loginNodeDefaultData } from "./nodes/LoginNode/types";
import { waitNodeDefaultData } from "./nodes/WaitNode/types";
import { fileDownloadNodeDefaultData } from "./nodes/FileDownloadNode/types";
import { ProxyLocation } from "@/api/types";
import { pdfParserNodeDefaultData } from "./nodes/PDFParserNode/types";

export const NEW_NODE_LABEL_PREFIX = "block_";

Expand Down Expand Up @@ -394,6 +396,19 @@ function convertToNode(
};
}

case "pdf_parser": {
return {
...identifiers,
...common,
type: "pdfParser",
data: {
...commonData,
fileUrl: block.file_url,
jsonSchema: JSON.stringify(block.json_schema, null, 2),
},
};
}

case "download_to_s3": {
return {
...identifiers,
Expand Down Expand Up @@ -788,6 +803,17 @@ function createNode(
},
};
}
case "pdfParser": {
return {
...identifiers,
...common,
type: "pdfParser",
data: {
...pdfParserNodeDefaultData,
label,
},
};
}
}
}

Expand Down Expand Up @@ -1020,6 +1046,14 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML {
parameter_keys: node.data.parameterKeys,
};
}
case "pdfParser": {
return {
...base,
block_type: "pdf_parser",
file_url: node.data.fileUrl,
json_schema: JSONParseSafe(node.data.jsonSchema),
};
}
default: {
throw new Error("Invalid node type for getWorkflowBlock");
}
Expand Down Expand Up @@ -1642,6 +1676,15 @@ function convertBlocksToBlockYAML(
};
return blockYaml;
}
case "pdf_parser": {
const blockYaml: PDFParserBlockYAML = {
...base,
block_type: "pdf_parser",
file_url: block.file_url,
json_schema: block.json_schema,
};
return blockYaml;
}
case "send_email": {
const blockYaml: SendEmailBlockYAML = {
...base,
Expand Down
10 changes: 9 additions & 1 deletion skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ export type WorkflowBlock =
| ExtractionBlock
| LoginBlock
| WaitBlock
| FileDownloadBlock;
| FileDownloadBlock
| PDFParserBlock;

export const WorkflowBlockTypes = {
Task: "task",
Expand All @@ -172,6 +173,7 @@ export const WorkflowBlockTypes = {
Login: "login",
Wait: "wait",
FileDownload: "file_download",
PDFParser: "pdf_parser",
} as const;

export function isTaskVariantBlock(item: {
Expand Down Expand Up @@ -369,6 +371,12 @@ export type FileDownloadBlock = WorkflowBlockBase & {
cache_actions: boolean;
};

export type PDFParserBlock = WorkflowBlockBase & {
block_type: "pdf_parser";
file_url: string;
json_schema: Record<string, unknown> | null;
};

export type WorkflowDefinition = {
parameters: Array<Parameter>;
blocks: Array<WorkflowBlock>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ export type BlockYAML =
| ExtractionBlockYAML
| LoginBlockYAML
| WaitBlockYAML
| FileDownloadBlockYAML;
| FileDownloadBlockYAML
| PDFParserBlockYAML;

export type BlockYAMLBase = {
block_type: WorkflowBlockType;
Expand Down Expand Up @@ -265,3 +266,9 @@ export type ForLoopBlockYAML = BlockYAMLBase & {
loop_blocks: Array<BlockYAML>;
loop_variable_reference: string | null;
};

export type PDFParserBlockYAML = BlockYAMLBase & {
block_type: "pdf_parser";
file_url: string;
json_schema: Record<string, unknown> | null;
};

0 comments on commit 619d72a

Please sign in to comment.