From 5601270376efa9b5077c53a21231f578ffa98bb7 Mon Sep 17 00:00:00 2001 From: acmepjz Date: Sat, 13 Jul 2024 03:49:23 +0800 Subject: [PATCH 1/2] chore: update toolchain, add BibtexQuery dependency --- lake-manifest.json | 12 +++++++++++- lakefile.lean | 3 +++ lean-toolchain | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lake-manifest.json b/lake-manifest.json index d7f8ac3f..7c5f3f2c 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -11,11 +11,21 @@ "inputRev": "main", "inherited": false, "configFile": "lakefile.lean"}, + {"url": "https://github.com/dupuisf/BibtexQuery", + "type": "git", + "subDir": null, + "scope": "", + "rev": "bd8747df9ee72fca365efa5bd3bd0d8dcd083b9f", + "name": "BibtexQuery", + "manifestFile": "lake-manifest.json", + "inputRev": "master", + "inherited": false, + "configFile": "lakefile.lean"}, {"url": "https://github.com/fgdorais/lean4-unicode-basic", "type": "git", "subDir": null, "scope": "", - "rev": "c74a052aebee847780e165611099854de050adf7", + "rev": "f93115d0209de6db335725dee900d379f40c0317", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "main", diff --git a/lakefile.lean b/lakefile.lean index 2dbdbffa..48366a9f 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -14,6 +14,9 @@ lean_exe «doc-gen4» { require MD4Lean from git "https://github.com/acmepjz/md4lean" @ "main" +require BibtexQuery from git + "https://github.com/dupuisf/BibtexQuery" @ "master" + require «UnicodeBasic» from git "https://github.com/fgdorais/lean4-unicode-basic" @ "main" diff --git a/lean-toolchain b/lean-toolchain index d69d1ed2..6a4259fa 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.10.0-rc1 +leanprover/lean4:v4.10.0-rc2 From de2fa60d8b35c9bc85a1eb6a9589706ef57805b9 Mon Sep 17 00:00:00 2001 From: acmepjz Date: Sat, 13 Jul 2024 03:54:39 +0800 Subject: [PATCH 2/2] feat: add bibliography support --- DocGen4/Output.lean | 51 +++++++++++--- DocGen4/Output/Base.lean | 78 +++++++++++++++++++-- DocGen4/Output/Bibtex.lean | 42 ++++++++++++ DocGen4/Output/DocString.lean | 119 +++++++++++++++++++++++++-------- DocGen4/Output/Inductive.lean | 2 +- DocGen4/Output/Module.lean | 7 +- DocGen4/Output/Navbar.lean | 27 +++++--- DocGen4/Output/References.lean | 97 +++++++++++++++++++++++++++ DocGen4/Output/Structure.lean | 2 +- Main.lean | 29 ++++++++ lakefile.lean | 66 +++++++++++++----- 11 files changed, 442 insertions(+), 78 deletions(-) create mode 100644 DocGen4/Output/Bibtex.lean create mode 100644 DocGen4/Output/References.lean diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 7ed9bfed..0411a465 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -10,6 +10,8 @@ import DocGen4.Output.Index import DocGen4.Output.Module import DocGen4.Output.NotFound import DocGen4.Output.Find +import DocGen4.Output.References +import DocGen4.Output.Bibtex import DocGen4.Output.SourceLinker import DocGen4.Output.Search import DocGen4.Output.ToJson @@ -20,6 +22,21 @@ namespace DocGen4 open Lean IO System Output Process +def collectBackrefs : IO (Array BackrefItem) := do + let mut backrefs : Array BackrefItem := #[] + for entry in ← System.FilePath.readDir declarationsBasePath do + if entry.fileName.startsWith "backrefs-" && entry.fileName.endsWith ".json" then + let fileContent ← FS.readFile entry.path + match Json.parse fileContent with + | .error err => + throw <| IO.userError s!"failed to parse file '{entry.path}' as json: {err}" + | .ok jsonContent => + match fromJson? jsonContent with + | .error err => + throw <| IO.userError s!"failed to parse file '{entry.path}': {err}" + | .ok (arr : Array BackrefItem) => backrefs := backrefs ++ arr + return backrefs + def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do let findBasePath := basePath / "find" @@ -35,6 +52,7 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do let foundationalTypesHtml := ReaderT.run foundationalTypes config |>.toString let navbarHtml := ReaderT.run navbar config |>.toString let searchHtml := ReaderT.run search config |>.toString + let referencesHtml := ReaderT.run (references (← collectBackrefs)) config |>.toString let docGenStatic := #[ ("style.css", styleCss), ("favicon.svg", faviconSvg), @@ -52,7 +70,8 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do ("index.html", indexHtml), ("foundational_types.html", foundationalTypesHtml), ("404.html", notFoundHtml), - ("navbar.html", navbarHtml) + ("navbar.html", navbarHtml), + ("references.html", referencesHtml) ] for (fileName, content) in docGenStatic do FS.writeFile (basePath / fileName) content @@ -72,14 +91,15 @@ def htmlOutputDeclarationDatas (result : AnalyzerResult) : HtmlT IO Unit := do def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) (sourceUrl? : Option String) : IO Unit := do let config : SiteContext := { - result := result, + result := result sourceLinker := SourceLinker.sourceLinker sourceUrl? + refsMap := .ofList (baseConfig.refs.map fun x => (x.citekey, x)).toList } FS.createDirAll basePath FS.createDirAll declarationsBasePath - discard <| htmlOutputDeclarationDatas result |>.run config baseConfig + discard <| htmlOutputDeclarationDatas result |>.run {} config baseConfig for (modName, module) in result.moduleInfo.toArray do let fileDir := moduleNameToDirectory basePath modName @@ -90,16 +110,29 @@ def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) ( depthToRoot := modName.components.dropLast.length currentName := some modName } - let moduleHtml := moduleToHtml module |>.run config baseConfig + let (moduleHtml, cfg) := moduleToHtml module |>.run {} config baseConfig + if not cfg.errors.isEmpty then + throw <| IO.userError s!"There are errors when generating '{filePath}': {cfg.errors}" FS.createDirAll fileDir FS.writeFile filePath moduleHtml.toString + FS.writeFile (declarationsBasePath / s!"backrefs-{module.name}.json") (toString (toJson cfg.backrefs)) def getSimpleBaseContext (hierarchy : Hierarchy) : IO SiteBaseContext := do - return { - depthToRoot := 0, - currentName := none, - hierarchy - } + let contents ← FS.readFile (declarationsBasePath / "references.json") <|> (pure "[]") + match Json.parse contents with + | .error err => + throw <| IO.userError s!"Failed to parse 'references.json': {err}" + | .ok jsonContent => + match fromJson? jsonContent with + | .error err => + throw <| IO.userError s!"Failed to parse 'references.json': {err}" + | .ok (refs : Array BibItem) => + return { + depthToRoot := 0 + currentName := none + hierarchy := hierarchy + refs := refs + } def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do htmlOutputSetup baseConfig diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index 9f2e01c0..5d6a1e7d 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -17,6 +17,32 @@ def basePath := lakeBuildDir / "doc" def srcBasePath := basePath / "src" def declarationsBasePath := lakeBuildDir / "doc-data" +/-- The structure representing a processed bibitem. -/ +structure BibItem where + /-- The cite key as in the bib file. -/ + citekey : String + /-- The tag generated by bib processor, e.g. `[Doe12]`. + Should be plain text and should not be escaped. -/ + tag : String + /-- The HTML generated by bib processor, e.g. + `John Doe. Test. 2012.`-/ + html : String + /-- The plain text form of `html` field. Should not be escaped. -/ + plaintext : String + deriving FromJson, ToJson + +/-- The structure representing a backref item. -/ +structure BackrefItem where + /-- The cite key as in the bib file. -/ + citekey : String + /-- The name of the module. -/ + modName : Name + /-- The name of the function, as a string. It is empty if the backref is in modstring. -/ + funName : String + /-- The index of the backref in that module, starting from zero. -/ + index : Nat + deriving FromJson, ToJson, Inhabited + /-- The context used in the `BaseHtmlM` monad for HTML templating. -/ @@ -35,9 +61,13 @@ structure SiteBaseContext where pages that don't have a module name. -/ currentName : Option Name + /-- + The list of references, as an array. + -/ + refs : Array BibItem /-- -The context used in the `HtmlM` monad for HTML templating. +The read-only context used in the `HtmlM` monad for HTML templating. -/ structure SiteContext where /-- @@ -48,27 +78,61 @@ structure SiteContext where A function to link declaration names to their source URLs, usually Github ones. -/ sourceLinker : Name → Option DeclarationRange → String + /-- + The references as a map. + -/ + refsMap : HashMap String BibItem + +/-- +The writable state used in the `HtmlM` monad for HTML templating. +-/ +structure SiteState where + /-- + The list of back references, as an array. + -/ + backrefs : Array BackrefItem := #[] + /-- + The errors occurred during the process. + -/ + errors : String := "" def setCurrentName (name : Name) (ctx : SiteBaseContext) := {ctx with currentName := some name} abbrev BaseHtmlT := ReaderT SiteBaseContext abbrev BaseHtmlM := BaseHtmlT Id -abbrev HtmlT (m) := ReaderT SiteContext (BaseHtmlT m) +abbrev HtmlT (m) := StateT SiteState <| ReaderT SiteContext <| BaseHtmlT m abbrev HtmlM := HtmlT Id -def HtmlT.run (x : HtmlT m α) (ctx : SiteContext) (baseCtx : SiteBaseContext) : m α := - ReaderT.run x ctx |>.run baseCtx +def HtmlT.run (x : HtmlT m α) (state : SiteState) (ctx : SiteContext) + (baseCtx : SiteBaseContext) : m (α × SiteState) := + StateT.run x state |>.run ctx |>.run baseCtx -def HtmlM.run (x : HtmlM α) (ctx : SiteContext) (baseCtx : SiteBaseContext) : α := - ReaderT.run x ctx |>.run baseCtx |>.run +def HtmlM.run (x : HtmlM α) (state : SiteState) (ctx : SiteContext) + (baseCtx : SiteBaseContext) : α × SiteState := + StateT.run x state |>.run ctx |>.run baseCtx |>.run instance [Monad m] : MonadLift HtmlM (HtmlT m) where - monadLift x := do return x.run (← readThe SiteContext) (← readThe SiteBaseContext) + monadLift x := do return (x.run (← getThe SiteState) (← readThe SiteContext) (← readThe SiteBaseContext)).1 instance [Monad m] : MonadLift BaseHtmlM (BaseHtmlT m) where monadLift x := do return x.run (← readThe SiteBaseContext) +/-- Add a backref of the given `citekey` and `funName` to current document, and returns it. -/ +def addBackref (citekey funName : String) : HtmlM BackrefItem := do + let newBackref : BackrefItem := { + citekey := citekey + modName := (← readThe SiteBaseContext).currentName.get! + funName := funName + index := (← get).backrefs.size + } + modify fun cfg => { cfg with backrefs := cfg.backrefs.push newBackref } + pure newBackref + +/-- Add an error message to errors of current document. -/ +def addError (err : String) : HtmlM Unit := do + modify fun cfg => { cfg with errors := cfg.errors ++ err ++ "\n" } + /-- Obtains the root URL as a relative one to the current depth. -/ diff --git a/DocGen4/Output/Bibtex.lean b/DocGen4/Output/Bibtex.lean new file mode 100644 index 00000000..d40cd4d9 --- /dev/null +++ b/DocGen4/Output/Bibtex.lean @@ -0,0 +1,42 @@ +import DocGen4.Output.References +import BibtexQuery.Parser +import BibtexQuery.Format + +/-! + +# bib file processor using `BibtexQuery` + +This file contains functions for bib file processor using +pure Lean library `BibtexQuery`. + +The main function is `DocGen4.Bibtex.process`. + +-/ + +open Lean Xml DocGen4 Output BibtexQuery + +namespace DocGen4.Bibtex + +/-- Process the contents of bib file. -/ +def process' (contents : String) : Except String (Array BibItem) := do + match BibtexQuery.Parser.bibtexFile contents.iter with + | .success _ arr => + let arr ← arr.toArray.filterMapM ProcessedEntry.ofEntry + return arr |> sortEntry |> deduplicateTag |>.map fun x => + let html := Formatter.format x + { + citekey := x.name + tag := x.tag + html := html.map cToStringEscaped |>.toList |> String.join + plaintext := html.map cToPlaintext |>.toList |> String.join + } + | .error it err => + throw s!"failed to parse bib file at pos {it.2}: {err}" + +/-- Process the contents of bib file. -/ +def process (contents : String) : IO (Array BibItem) := do + match process' contents with + | .ok ret => return ret + | .error err => throw <| IO.userError err + +end DocGen4.Bibtex diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index df9a918a..ec7150f8 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -119,8 +119,28 @@ def extendLink (s : String) : HtmlM String := do return s else return ((← getRoot) ++ s) +/-- Apply function `modifyElement` to an array of `Lean.Xml.Content`s. -/ +def modifyContents (contents : Array Content) (funName : String) + (modifyElement : Element → String → HtmlM Element) : + HtmlM (Array Content) := do + let modifyContent (c : Content) : HtmlM Content := do + match c with + | Content.Element e => + pure (.Element (← modifyElement e funName)) + | _ => + pure c + contents.mapM modifyContent + +/-- Apply function `modifyElement` to an array of `Lean.Xml.Element`s. -/ +def modifyElements (elements : Array Element) (funName : String) + (modifyElement : Element → String → HtmlM Element) : + HtmlM (Array Element) := do + elements.mapM (modifyElement · funName) + /-- Add attributes for heading. -/ -def addHeadingAttributes (el : Element) (modifyElement : Element → HtmlM Element) : HtmlM Element := do +def addHeadingAttributes (el : Element) (funName : String) + (modifyElement : Element → String → HtmlM Element) : + HtmlM Element := do match el with | Element.Element name attrs contents => do let id := xmlGetHeadingId el @@ -131,22 +151,42 @@ def addHeadingAttributes (el : Element) (modifyElement : Element → HtmlM Eleme let newAttrs := attrs |>.insert "id" id |>.insert "class" "markdown-heading" - let newContents := (← - contents.mapM (fun c => match c with - | Content.Element e => return Content.Element (← modifyElement e) - | _ => pure c)) + let newContents ← modifyContents contents funName modifyElement + return ⟨ name, newAttrs, newContents |>.push (Content.Character " ") - |>.push (Content.Element anchor) - return ⟨ name, newAttrs, newContents⟩ + |>.push (Content.Element anchor) ⟩ + +/-- Find a bibitem if `href` starts with `thePrefix`. -/ +def findBibitem? (href : String) (thePrefix : String := "") : HtmlM (Option BibItem) := do + if href.startsWith thePrefix then + pure <| (← read).refsMap.find? (href.drop thePrefix.length) + else + pure .none /-- Extend anchor links. -/ -def extendAnchor (el : Element) : HtmlM Element := do +def extendAnchor (el : Element) (funName : String) : HtmlM Element := do match el with | Element.Element name attrs contents => - let newAttrs ← match attrs.find? "href" with - | some href => pure (attrs.insert "href" (← extendLink href)) - | none => pure attrs - return ⟨ name, newAttrs, contents⟩ + match attrs.find? "href" with + | some href => + let bibitem ← findBibitem? href "references.html#ref_" + let attrs := attrs.insert "href" (← extendLink href) + match bibitem with + | .some bibitem => + let newBackref ← addBackref bibitem.citekey funName + let changeName : Bool := + if let #[.Character s] := contents then + s == bibitem.citekey + else + false + let newContents : Array Content := + if changeName then #[.Character bibitem.tag] else contents + let attrs := attrs.insert "title" bibitem.plaintext + |>.insert "id" s!"_backref_{newBackref.index}" + return ⟨ name, attrs, newContents ⟩ + | .none => + return ⟨ name, attrs, contents ⟩ + | none => return ⟨ name, attrs, contents ⟩ /-- Automatically add intra documentation link for inline code span. -/ def autoLink (el : Element) : HtmlM Element := do @@ -187,15 +227,15 @@ def autoLink (el : Element) : HtmlM Element := do cats.any (Unicode.isInGeneralCategory c) /-- Core function of modifying the cmark rendered docstring html. -/ -partial def modifyElement (element : Element) : HtmlM Element := +partial def modifyElement (element : Element) (funName : String) : HtmlM Element := match element with | el@(Element.Element name attrs contents) => do -- add id and class to if name = "h1" ∨ name = "h2" ∨ name = "h3" ∨ name = "h4" ∨ name = "h5" ∨ name = "h6" then - addHeadingAttributes el modifyElement + addHeadingAttributes el funName modifyElement -- extend relative href for else if name = "a" then - extendAnchor el + extendAnchor el funName -- auto link for inline else if name = "code" ∧ -- don't linkify code blocks explicitly tagged with a language other than lean @@ -203,22 +243,43 @@ partial def modifyElement (element : Element) : HtmlM Element := autoLink el -- recursively modify else - let newContents ← contents.mapM fun c => match c with - | Content.Element e => return Content.Element (← modifyElement e) - | _ => pure c - return ⟨ name, attrs, newContents ⟩ + return ⟨ name, attrs, ← modifyContents contents funName modifyElement ⟩ + +/-- Find all references in a markdown text. -/ +partial def findAllReferences (refsMap : HashMap String BibItem) (s : String) (i : String.Pos := 0) + (ret : HashSet String := .empty) : HashSet String := + let lps := s.posOfAux '[' s.endPos i + if lps < s.endPos then + let lpe := s.posOfAux ']' s.endPos lps + if lpe < s.endPos then + let citekey := Substring.toString ⟨s, ⟨lps.1 + 1⟩, lpe⟩ + match refsMap.find? citekey with + | .some _ => findAllReferences refsMap s lpe (ret.insert citekey) + | .none => findAllReferences refsMap s lpe ret + else + ret + else + ret /-- Convert docstring to Html. -/ -def docStringToHtml (s : String) : HtmlM (Array Html) := do - let rendered := match MD4Lean.renderHtml s with - | .some res => res - | _ => "" -- TODO: should print some error message - match manyDocument rendered.mkIterator with - | Parsec.ParseResult.success _ res => - -- TODO: use `toString` instead of `eToStringEscaped` - -- once is fixed - res.mapM fun x => do return Html.raw <| eToStringEscaped (← modifyElement x) - | _ => return #[Html.raw rendered] +def docStringToHtml (docString : String) (funName : String) : HtmlM (Array Html) := do + let refsMarkdown := "\n\n" ++ (String.join <| + (findAllReferences (← read).refsMap docString).toList.map fun s => + s!"[{s}]: references.html#ref_{s}\n") + match MD4Lean.renderHtml (docString ++ refsMarkdown) with + | .some rendered => + match manyDocument rendered.mkIterator with + | Parsec.ParseResult.success _ res => + let newRes ← modifyElements res funName modifyElement + -- TODO: use `toString` instead of `eToStringEscaped` + -- once is fixed + return (newRes.map fun x => Html.raw (eToStringEscaped x)) + | _ => + addError <| "Error: failed to postprocess HTML:\n" ++ rendered + return #[.raw "Error: failed to postprocess: ", .raw rendered] + | .none => + addError <| "Error: failed to parse markdown:\n" ++ docString + return #[.raw "Error: failed to parse markdown: ", .text docString] end Output end DocGen4 diff --git a/DocGen4/Output/Inductive.lean b/DocGen4/Output/Inductive.lean index 3d6eaf3b..ead0e067 100644 --- a/DocGen4/Output/Inductive.lean +++ b/DocGen4/Output/Inductive.lean @@ -19,7 +19,7 @@ def ctorToHtml (c : Process.NameInfo) : HtmlM Html := do let shortName := c.name.componentsRev.head!.toString let name := c.name.toString if let some doc := c.doc then - let renderedDoc ← docStringToHtml doc + let renderedDoc ← docStringToHtml doc name pure
  • {shortName} : [← infoFormatToHtml c.type] diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 1da970e5..a9b822ee 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -96,7 +96,7 @@ def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Html := do | _ => pure #[] -- rendered doc stirng let docStringHtml ← match doc.getDocString with - | some s => docStringToHtml s + | some s => docStringToHtml s doc.getName.toString | none => pure #[] -- extra information like equations and instances let extraInfoHtml ← match doc with @@ -135,7 +135,7 @@ as HTML. def modDocToHtml (mdoc : ModuleDoc) : HtmlM Html := do pure
    - [← docStringToHtml mdoc.doc] + [← docStringToHtml mdoc.doc ""]
    /-- @@ -199,6 +199,9 @@ def moduleToHtml (module : Process.Module) : HtmlM Html := withTheReader SiteBas let relevantMembers := module.members.filter Process.ModuleMember.shouldRender let memberDocs ← relevantMembers.mapM (moduleMemberToHtml module.name) let memberNames := filterDocInfo relevantMembers |>.map DocInfo.getName + letI : MonadLift BaseHtmlM HtmlM := { + monadLift := fun x => do return x.run (← readThe SiteBaseContext) + } templateLiftExtends (baseHtmlGenerator module.name.toString) <| pure #[ ← internalNav memberNames module.name, Html.element "main" false #[] memberDocs diff --git a/DocGen4/Output/Navbar.lean b/DocGen4/Output/Navbar.lean index 609af3e8..276f98de 100644 --- a/DocGen4/Output/Navbar.lean +++ b/DocGen4/Output/Navbar.lean @@ -55,6 +55,21 @@ def moduleList : BaseHtmlM Html := do The main entry point to rendering the navbar on the left hand side. -/ def navbar : BaseHtmlM Html := do + /- + TODO: Add these in later + + + + + + -/ + let mut staticPages : Array Html := #[ + , + + ] + let config ← read + if not config.refs.isEmpty then + staticPages := staticPages.push pure @@ -69,17 +84,7 @@ def navbar : BaseHtmlM Html := do