Skip to content

Commit

Permalink
Merge pull request #2915 from quantified-uncertainty/llm-compress
Browse files Browse the repository at this point in the history
Fixing LLM generation, using JSON
  • Loading branch information
OAGr authored Jan 5, 2024
2 parents 3a1c31d + 87d7535 commit 56e8f50
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 53 deletions.
5 changes: 3 additions & 2 deletions packages/website/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"dev": "next dev",
"lint": "prettier --check . && next lint",
"format": "prettier --write .",
"gen:docsForLLM": "node scripts/compileDocsForLLM.js",
"gen:modulePages": "node scripts/generateModulePages.mjs",
"gen:docsForLLM": "tsx scripts/compileDocsForLLM.mts",
"gen:modulePages": "tsx scripts/generateModulePages.mts",
"gen": "pnpm run gen:modulePages && pnpm run gen:docsForLLM",
"build": "pnpm run gen && next build"
},
Expand Down Expand Up @@ -40,6 +40,7 @@
"postcss": "^8.4.32",
"prettier": "^3.1.1",
"tailwindcss": "^3.4.0",
"tsx": "^4.7.0",
"typescript": "^5.3.3"
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,48 @@
#!/usr/bin/env node
const { glob } = require("glob");
const fs = require("fs");
import fs from "fs";
import { glob } from "glob";

const readFile = (fileName) => {
import { FnDocumentation } from "@quri/squiggle-lang";

import { modulePages } from "../templates.mjs";
import { generateModuleContent } from "./generateModuleContent.mjs";

const readFile = (fileName: string) => {
return fs.readFileSync(fileName, "utf-8");
};

function moduleItemToJson({
name,
description,
nameSpace,
requiresNamespace,
examples,
signatures,
shorthand,
isUnit,
}: FnDocumentation) {
return JSON.stringify(
{
name,
description,
nameSpace,
requiresNamespace,
examples,
signatures,
shorthand,
isUnit,
},
null,
2
);
}

const allDocumentationItems = () => {
return modulePages
.map((page) => generateModuleContent(page, moduleItemToJson))
.join("\n\n\n");
};

const documentationBundlePage = async () => {
const targetFilename = "./public/llms/documentationBundle.txt";

Expand All @@ -19,20 +56,22 @@ This file is auto-generated from the documentation files in the Squiggle reposit
return readFile(grammarFiles[0]);
};

const getDocumentationContent = async () => {
const getGuideContent = async () => {
const documentationFiles = await glob(
"./src/pages/docs/{Api,Guides}/*.{md,mdx}"
"./src/pages/docs/{Guides}/*.{md,mdx}"
);
return documentationFiles.map(readFile).join("\n\n\n");
};

console.log("Compiling documentation bundle page...");
const grammarContent = await getGrammarContent();
const documentationContent = await getDocumentationContent();
const guideContent = await getGuideContent();
const apiContent = allDocumentationItems();
const content =
header +
`## Peggy Grammar \n\n ${grammarContent} \n\n --- \n\n ` +
documentationContent;
guideContent +
apiContent;
fs.writeFile(targetFilename, content, (err) => {
if (err) {
console.error(err);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
#!/usr/bin/env node
import fs from "fs";

import {
FnDocumentation,
getAllFunctionNamesWithNamespace,
getFunctionDocumentation,
} from "@quri/squiggle-lang";

import { modulePages } from "../templates.mjs";

const targetFilename = (name) => `./src/pages/docs/Api/${name}.mdx`;
import { ModulePage, ModulePageSection } from "../templates.mjs";

//We need to escape the curly braces in the markdown for .jsx files.
function escapedStr(str) {
function escapedStr(str: string) {
return str.replace(/{/g, "\\{").replace(/}/g, "\\}");
}

function toMarkdown(documentation) {
function toMarkdown(documentation: FnDocumentation) {
const fullName = documentation.nameSpace + "." + documentation.name;
return `### ${documentation.name}
${escapedStr(documentation.description || "")}
<FnDocumentationFromName functionName="${fullName}" showNameAndDescription={false} size="small" />
`;
${escapedStr(documentation.description || "")}
<FnDocumentationFromName functionName="${fullName}" showNameAndDescription={false} size="small" />
`;
}

const generateModulePage = async ({ name, description, intro, sections }) => {
export function generateModuleContent(
{ name, description, intro, sections }: ModulePage,
itemFn = toMarkdown
) {
// const itemFn = toJSON;
const namespaceNames = getAllFunctionNamesWithNamespace(name);
let fnDocumentationItems = namespaceNames
.map(getFunctionDocumentation)
.filter(({ isUnit }) => !isUnit);
.filter((fn): fn is FnDocumentation => Boolean(fn && !fn.isUnit));

const processSection = (section) => {
const processSection = (section: ModulePageSection) => {
const sectionFnDocumentationItems = fnDocumentationItems.filter(
({ displaySection }) => displaySection === section.name
);
Expand All @@ -42,15 +43,15 @@ const generateModulePage = async ({ name, description, intro, sections }) => {
const sectionHeader = section.name && `## ${section.name}\n\n`;
const sectionDescription =
section.description && `${section.description}\n\n`;
const sectionItems = sectionFnDocumentationItems.map(toMarkdown).join("\n");
const sectionItems = sectionFnDocumentationItems.map(itemFn).join("\n");
return `${sectionHeader || ""}${sectionDescription || ""}${sectionItems}`;
};

let functionSection;
if (sections?.length > 0) {
if (sections && sections.length > 0) {
functionSection = sections.map(processSection).join("\n\n");
} else {
functionSection = fnDocumentationItems.map(toMarkdown).join("\n\n");
functionSection = fnDocumentationItems.map(itemFn).join("\n\n");
}

const content = `---
Expand All @@ -62,17 +63,5 @@ import { SquiggleEditor, FnDocumentationFromName } from "@quri/squiggle-componen
${intro}
${functionSection}`;

fs.writeFile(targetFilename(name), content, (err) => {
if (err) {
console.error(err);
return;
}
console.log(`Content written to ${targetFilename(name)}`);
});
};

//Remember to add any new Modules to .gitignore
for (const modulePage of modulePages) {
await generateModulePage(modulePage);
return content;
}
46 changes: 46 additions & 0 deletions packages/website/scripts/generateModulePages.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env node
import fs from "fs";

import { FnDocumentation } from "@quri/squiggle-lang";

import { ModulePage, modulePages } from "../templates.mjs";
import { generateModuleContent } from "./generateModuleContent.mjs";

const targetFilename = (name: string) => `./src/pages/docs/Api/${name}.mdx`;

//We need to escape the curly braces in the markdown for .jsx files.
function escapedStr(str: string) {
return str.replace(/{/g, "\\{").replace(/}/g, "\\}");
}

function toMarkdown(documentation: FnDocumentation) {
const fullName = documentation.nameSpace + "." + documentation.name;
return `### ${documentation.name}${escapedStr(
documentation.description || ""
)}
<FnDocumentationFromName functionName="${fullName}" showNameAndDescription={false} size="small" />
`;
}

const generateModulePage = async (
{ name, description, intro, sections }: ModulePage,
itemFn = toMarkdown
) => {
const content = generateModuleContent(
{ name, description, intro, sections },
itemFn
);

fs.writeFile(targetFilename(name), content, (err) => {
if (err) {
console.error(err);
return;
}
console.log(`Content written to ${targetFilename(name)}`);
});
};

//Remember to add any new Modules to .gitignore
for (const modulePage of modulePages) {
await generateModulePage(modulePage, toMarkdown);
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
export const modulePages = [
export type ModulePageSection = { name: string; description?: string };

export type ModulePage = {
name: string;
description: string;
sections?: ModulePageSection[];
intro: string;
};

export const modulePages: ModulePage[] = [
{
name: "Tag",
description:
Expand Down
10 changes: 9 additions & 1 deletion packages/website/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
"jsx": "preserve",
"allowJs": true
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx",
"scripts/generateModulePages.mts",
"scripts/generateModuleContent.mts",
"scripts/compileDocsForLLM.mts",
"templates.mts"
],
"exclude": ["node_modules"]
}
26 changes: 13 additions & 13 deletions packages/website/turbo.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
"!.next/cache/**",
"public/llms/documentationBundle.txt",
"src/pages/docs/Ecosystem/LLMPrompt.md",
"/src/pages/docs/Api/Date.mdx",
"/src/pages/docs/Api/Duration.mdx",
"/src/pages/docs/Api/Tag.mdx",
"/src/pages/docs/Api/Calculator.mdx",
"/src/pages/docs/Api/Input.mdx",
"/src/pages/docs/Api/Dict.mdx",
"/src/pages/docs/Api/SampleSet.mdx",
"/src/pages/docs/Api/PointSet.mdx",
"/src/pages/docs/Api/Sym.mdx",
"/src/pages/docs/Api/Scale.mdx",
"/src/pages/docs/Api/List.mdx",
"/src/pages/docs/Api/Danger.mdx",
"/src/pages/docs/Api/Dist.mdx",
"src/pages/docs/Api/Date.mdx",
"src/pages/docs/Api/Duration.mdx",
"src/pages/docs/Api/Tag.mdx",
"src/pages/docs/Api/Calculator.mdx",
"src/pages/docs/Api/Input.mdx",
"src/pages/docs/Api/Dict.mdx",
"src/pages/docs/Api/SampleSet.mdx",
"src/pages/docs/Api/PointSet.mdx",
"src/pages/docs/Api/Sym.mdx",
"src/pages/docs/Api/Scale.mdx",
"src/pages/docs/Api/List.mdx",
"src/pages/docs/Api/Danger.mdx",
"src/pages/docs/Api/Dist.mdx",
"src/pages/docs/Ecosystem/LLMPrompt.md"
]
},
Expand Down
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 comment on commit 56e8f50

@vercel
Copy link

@vercel vercel bot commented on 56e8f50 Jan 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.