diff --git a/package-lock.json b/package-lock.json index 9e03ec2..e445d38 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,6 +20,7 @@ "lodash.isequal": "^4.5.0", "lucide-react": "^0.447.0", "openai": "^4.65.0", + "parse5": "^7.1.2", "react": "^18.3.1", "react-dom": "^18.3.1", "react-markdown": "^9.0.1", @@ -3984,6 +3985,17 @@ "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", "dev": true }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -7948,6 +7960,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", diff --git a/package.json b/package.json index bdf0829..d2ec86b 100644 --- a/package.json +++ b/package.json @@ -50,6 +50,7 @@ "lodash.isequal": "^4.5.0", "lucide-react": "^0.447.0", "openai": "^4.65.0", + "parse5": "^7.1.2", "react": "^18.3.1", "react-dom": "^18.3.1", "react-markdown": "^9.0.1", diff --git a/src/components/ReactMarkdown.tsx b/src/components/ReactMarkdown.tsx index 628b624..8350c45 100644 --- a/src/components/ReactMarkdown.tsx +++ b/src/components/ReactMarkdown.tsx @@ -1,56 +1,11 @@ import Markdown from 'react-markdown' -import MarkdownCodeComponent from './MarkdownCodeComponent' - -function parsesmtcmpBlocks(input: string): ( - | { type: 'string'; content: string } - | { - type: 'smtcmpBlock' - content: string - language?: string - filename?: string - } -)[] { - const regex = /]*)>\s*([\s\S]*?)\s*(?:<\/smtcmpBlock>|$)/g - const matches = input.matchAll(regex) - const result: ( - | { type: 'string'; content: string } - | { - type: 'smtcmpBlock' - content: string - language?: string - filename?: string - } - )[] = [] - - let lastIndex = 0 - for (const match of matches) { - if (match.index > lastIndex) { - result.push({ - type: 'string', - content: input.slice(lastIndex, match.index), - }) - } - const [, attributes, content] = match - const language = attributes.match(/language="([^"]+)"/)?.[1] - const filename = attributes.match(/filename="([^"]+)"/)?.[1] - result.push({ - type: 'smtcmpBlock', - content, - language, - filename, - }) - lastIndex = match.index + match[0].length - } - if (lastIndex < input.length) { - result.push({ - type: 'string', - content: input.slice(lastIndex), - }) - } +import { + ParsedSmtcmpBlock, + parsesmtcmpBlocks, +} from '../utils/parse-smtcmp-block' - return result -} +import MarkdownCodeComponent from './MarkdownCodeComponent' export default function ReactMarkdown({ onApply, @@ -61,8 +16,7 @@ export default function ReactMarkdown({ children: string isApplying: boolean }) { - const blocks = parsesmtcmpBlocks(children) - + const blocks: ParsedSmtcmpBlock[] = parsesmtcmpBlocks(children) return ( <> {blocks.map((block, index) => diff --git a/src/utils/apply.ts b/src/utils/apply.ts index 1d1f7a7..08d10b9 100644 --- a/src/utils/apply.ts +++ b/src/utils/apply.ts @@ -10,14 +10,14 @@ const systemPrompt = `You are an intelligent assistant helping a user apply chan You will receive: 1. The content of the target markdown file. -2. A conversation history between the user and the assistant. This conversation may contain multiple markdown blocks suggesting changes to the file. Markdown blocks are indicated by the tag. For example: - +2. A conversation history between the user and the assistant. This conversation may contain multiple markdown blocks suggesting changes to the file. Markdown blocks are indicated by the tag. For example: + {{ edit_1 }} {{ edit_2 }} - + 3. A single, specific markdown block extracted from the conversation history. This block contains the exact changes that should be applied to the target file. Please rewrite the entire markdown file with ONLY the changes from the specified markdown block applied. DO NOT apply changes suggested by other parts of the conversation. Preserve all parts of the original file that are not related to the changes. Output only the file content, without any additional words or explanations.` @@ -72,9 +72,9 @@ ${chatMessages ## Changes to Apply Here is the markdown block that indicates where content changes should be applied. - + ${blockToApply} - + Now rewrite the entire file with the changes applied. Immediately start your response with \`\`\`${currentFile.path}` } diff --git a/src/utils/parse-smtcmp-block.test.ts b/src/utils/parse-smtcmp-block.test.ts new file mode 100644 index 0000000..6b40152 --- /dev/null +++ b/src/utils/parse-smtcmp-block.test.ts @@ -0,0 +1,158 @@ +import { ParsedSmtcmpBlock, parsesmtcmpBlocks } from './parse-smtcmp-block' + +describe('parsesmtcmpBlocks', () => { + it('should parse a string with smtcmp_block elements', () => { + const input = `Some text before + +# Example Markdown + +This is a sample markdown content for testing purposes. + +## Features + +- Lists +- **Bold text** +- *Italic text* +- [Links](https://example.com) + +### Code Block +\`\`\`python +print("Hello, world!") +\`\`\` + +Some text after` + + const expected: ParsedSmtcmpBlock[] = [ + { type: 'string', content: 'Some text before\n' }, + { + type: 'smtcmp_block', + content: ` +# Example Markdown + +This is a sample markdown content for testing purposes. + +## Features + +- Lists +- **Bold text** +- *Italic text* +- [Links](https://example.com) + +### Code Block +\`\`\`python +print("Hello, world!") +\`\`\` +`, + language: 'markdown', + filename: 'example.md', + }, + { type: 'string', content: '\nSome text after' }, + ] + + const result = parsesmtcmpBlocks(input) + expect(result).toEqual(expected) + }) + + it('should handle empty smtcmp_block elements', () => { + const input = ` + + ` + + const expected: ParsedSmtcmpBlock[] = [ + { type: 'string', content: '\n ' }, + { + type: 'smtcmp_block', + content: '', + language: 'python', + filename: undefined, + }, + { type: 'string', content: '\n ' }, + ] + + const result = parsesmtcmpBlocks(input) + expect(result).toEqual(expected) + }) + + it('should handle input without smtcmp_block elements', () => { + const input = 'Just a regular string without any smtcmp_block elements.' + + const expected: ParsedSmtcmpBlock[] = [{ type: 'string', content: input }] + + const result = parsesmtcmpBlocks(input) + expect(result).toEqual(expected) + }) + + it('should handle multiple smtcmp_block elements', () => { + const input = `Start + +def greet(name): + print(f"Hello, {name}!") + +Middle + +# Using tildes for code blocks + +Did you know that you can use tildes for code blocks? + +~~~python +print("Hello, world!") +~~~ + +End` + + const expected: ParsedSmtcmpBlock[] = [ + { type: 'string', content: 'Start\n' }, + { + type: 'smtcmp_block', + content: ` +def greet(name): + print(f"Hello, {name}!") +`, + language: 'python', + filename: 'script.py', + }, + { type: 'string', content: '\nMiddle\n' }, + { + type: 'smtcmp_block', + content: ` +# Using tildes for code blocks + +Did you know that you can use tildes for code blocks? + +~~~python +print("Hello, world!") +~~~ +`, + language: 'markdown', + filename: 'example.md', + }, + { type: 'string', content: '\nEnd' }, + ] + + const result = parsesmtcmpBlocks(input) + expect(result).toEqual(expected) + }) + + it('should handle unfinished smtcmp_block with only opening tag', () => { + const input = `Start + +# Unfinished smtcmp_block + +Some text after without closing tag` + const expected: ParsedSmtcmpBlock[] = [ + { type: 'string', content: 'Start\n' }, + { + type: 'smtcmp_block', + content: ` +# Unfinished smtcmp_block + +Some text after without closing tag`, + language: 'markdown', + filename: undefined, + }, + ] + + const result = parsesmtcmpBlocks(input) + expect(result).toEqual(expected) + }) +}) diff --git a/src/utils/parse-smtcmp-block.ts b/src/utils/parse-smtcmp-block.ts new file mode 100644 index 0000000..8b895ff --- /dev/null +++ b/src/utils/parse-smtcmp-block.ts @@ -0,0 +1,72 @@ +import { parseFragment } from 'parse5' + +export type ParsedSmtcmpBlock = + | { type: 'string'; content: string } + | { + type: 'smtcmp_block' + content: string + language?: string + filename?: string + } + +export function parsesmtcmpBlocks(input: string): ParsedSmtcmpBlock[] { + const parsedResult: ParsedSmtcmpBlock[] = [] + const fragment = parseFragment(input, { + sourceCodeLocationInfo: true, + }) + let lastEndOffset = 0 + for (const node of fragment.childNodes) { + if (node.nodeName === 'smtcmp_block') { + if (!node.sourceCodeLocation) { + throw new Error('sourceCodeLocation is undefined') + } + const startOffset = node.sourceCodeLocation.startOffset + const endOffset = node.sourceCodeLocation.endOffset + if (startOffset > lastEndOffset) { + parsedResult.push({ + type: 'string', + content: input.slice(lastEndOffset, startOffset), + }) + } + + const language = node.attrs.find( + (attr) => attr.name === 'language', + )?.value + const filename = node.attrs.find( + (attr) => attr.name === 'filename', + )?.value + + const children = node.childNodes + if (children.length === 0) { + parsedResult.push({ + type: 'smtcmp_block', + content: '', + language, + filename, + }) + } else { + const innerContentStartOffset = + children[0].sourceCodeLocation?.startOffset + const innerContentEndOffset = + children[children.length - 1].sourceCodeLocation?.endOffset + if (!innerContentStartOffset || !innerContentEndOffset) { + throw new Error('sourceCodeLocation is undefined') + } + parsedResult.push({ + type: 'smtcmp_block', + content: input.slice(innerContentStartOffset, innerContentEndOffset), + language, + filename, + }) + } + lastEndOffset = endOffset + } + } + if (lastEndOffset < input.length) { + parsedResult.push({ + type: 'string', + content: input.slice(lastEndOffset), + }) + } + return parsedResult +} diff --git a/src/utils/prompt.ts b/src/utils/prompt.ts index e058667..a641ab2 100644 --- a/src/utils/prompt.ts +++ b/src/utils/prompt.ts @@ -67,14 +67,14 @@ export const parseRequestMessages = async ( 1. Please keep your response as concise as possible. Avoid being verbose. -2. When the user is asking for edits to their markdown, please provide a simplified version of the markdown block emphasizing only the changes. Use comments to show where unchanged content has been skipped. Wrap the markdown block with tags. Add filename and language attributes to the tags. For example: - +2. When the user is asking for edits to their markdown, please provide a simplified version of the markdown block emphasizing only the changes. Use comments to show where unchanged content has been skipped. Wrap the markdown block with tags. Add filename and language attributes to the tags. For example: + {{ edit_1 }} {{ edit_2 }} - + The user has full access to the file, so they prefer seeing only the changes in the markdown. Often this will mean that the start/end of the file will be skipped, but that's okay! Rewrite the entire file only if specifically requested. Always provide a brief explanation of the updates, except when the user specifically asks for just the content. 3. Do not lie or make up facts. @@ -83,18 +83,18 @@ The user has full access to the file, so they prefer seeing only the changes in 5. Format your response in markdown. -6. When writing out new markdown blocks, also wrap them with tags. For example: - +6. When writing out new markdown blocks, also wrap them with tags. For example: + {{ content }} - + -7. When providing markdown blocks for an existing file, add the filename and language attributes to the tags. Restate the relevant section or heading, so the user knows which part of the file you are editing. For example: - +7. When providing markdown blocks for an existing file, add the filename and language attributes to the tags. Restate the relevant section or heading, so the user knows which part of the file you are editing. For example: + ## Section Title ... {{ content }} ... -`, +`, } const currentFile = lastUserMessage.mentionables.find(