From 80b96e2bf90cd3ae975f23669fc35e8d3b4a7d73 Mon Sep 17 00:00:00 2001 From: acmepjz Date: Sat, 22 Jun 2024 22:45:43 +0800 Subject: [PATCH 01/23] feat: add bibliography support --- DocGen4/Output.lean | 34 ++++-- DocGen4/Output/Base.lean | 34 ++++++ DocGen4/Output/Class.lean | 5 +- DocGen4/Output/ClassInductive.lean | 5 +- DocGen4/Output/DocString.lean | 164 ++++++++++++++++++++--------- DocGen4/Output/Inductive.lean | 29 +++-- DocGen4/Output/Module.lean | 57 ++++++---- DocGen4/Output/Navbar.lean | 27 +++-- DocGen4/Output/Pybtex.lean | 152 ++++++++++++++++++++++++++ DocGen4/Output/References.lean | 77 ++++++++++++++ DocGen4/Output/Structure.lean | 31 ++++-- DocGen4/Output/ToHtmlFormat.lean | 31 ++++++ Main.lean | 27 ++++- README.md | 2 + lakefile.lean | 57 ++++++---- 15 files changed, 601 insertions(+), 131 deletions(-) create mode 100644 DocGen4/Output/Pybtex.lean create mode 100644 DocGen4/Output/References.lean diff --git a/DocGen4/Output.lean b/DocGen4/Output.lean index 52c605b2..180b16e6 100644 --- a/DocGen4/Output.lean +++ b/DocGen4/Output.lean @@ -10,6 +10,8 @@ import DocGen4.Output.Index import DocGen4.Output.Module import DocGen4.Output.NotFound import DocGen4.Output.Find +import DocGen4.Output.References +import DocGen4.Output.Pybtex import DocGen4.Output.SourceLinker import DocGen4.Output.Search import DocGen4.Output.ToJson @@ -20,6 +22,16 @@ namespace DocGen4 open Lean IO System Output Process +def collectBackrefs : IO (Array BackrefItem) := do + let mut backrefs : Array BackrefItem := #[] + for entry in ← System.FilePath.readDir declarationsBasePath do + if entry.fileName.startsWith "backrefs-" && entry.fileName.endsWith ".json" then + let fileContent ← FS.readFile entry.path + let .ok jsonContent := Json.parse fileContent | unreachable! + let .ok (arr : Array BackrefItem) := fromJson? jsonContent | unreachable! + backrefs := backrefs ++ arr + return backrefs + def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do let findBasePath := basePath / "find" @@ -35,7 +47,8 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do let foundationalTypesHtml := ReaderT.run foundationalTypes config |>.toString let navbarHtml := ReaderT.run navbar config |>.toString let searchHtml := ReaderT.run search config |>.toString - let docGenStatic := #[ + let referencesHtml := ReaderT.run (references (← collectBackrefs)) config |>.toString + let mut docGenStatic := #[ ("style.css", styleCss), ("favicon.svg", faviconSvg), ("declaration-data.js", declarationDataCenterJs), @@ -52,7 +65,8 @@ def htmlOutputSetup (config : SiteBaseContext) : IO Unit := do ("index.html", indexHtml), ("foundational_types.html", foundationalTypesHtml), ("404.html", notFoundHtml), - ("navbar.html", navbarHtml) + ("navbar.html", navbarHtml), + ("references.html", referencesHtml) ] for (fileName, content) in docGenStatic do FS.writeFile (basePath / fileName) content @@ -72,8 +86,9 @@ def htmlOutputDeclarationDatas (result : AnalyzerResult) : HtmlT IO Unit := do def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) (sourceUrl? : Option String) : IO Unit := do let config : SiteContext := { - result := result, + result := result sourceLinker := SourceLinker.sourceLinker sourceUrl? + refsMap := .ofList (baseConfig.refs.map fun x => (x.citekey, x)).toList } FS.createDirAll basePath @@ -90,15 +105,20 @@ def htmlOutputResults (baseConfig : SiteBaseContext) (result : AnalyzerResult) ( depthToRoot := modName.components.dropLast.length currentName := some modName } - let moduleHtml := moduleToHtml module |>.run config baseConfig + let (moduleHtml, backrefs) := moduleToHtml module #[] |>.run config baseConfig FS.createDirAll fileDir FS.writeFile filePath moduleHtml.toString + FS.writeFile (declarationsBasePath / s!"backrefs-{module.name}.json") (toString (toJson backrefs)) def getSimpleBaseContext (hierarchy : Hierarchy) : IO SiteBaseContext := do + let contents ← FS.readFile (declarationsBasePath / "references.json") <|> (pure "[]") + let .ok jsonContent := Json.parse contents | unreachable! + let .ok (refs : Array BibItem) := fromJson? jsonContent | unreachable! return { - depthToRoot := 0, - currentName := none, - hierarchy + depthToRoot := 0 + currentName := none + hierarchy := hierarchy + refs := refs } def htmlOutputIndex (baseConfig : SiteBaseContext) : IO Unit := do diff --git a/DocGen4/Output/Base.lean b/DocGen4/Output/Base.lean index 1ea53c9a..07c0bef4 100644 --- a/DocGen4/Output/Base.lean +++ b/DocGen4/Output/Base.lean @@ -15,6 +15,32 @@ def basePath := FilePath.mk "." / ".lake" / "build" / "doc" def srcBasePath := basePath / "src" def declarationsBasePath := basePath / "declarations" +/-- The structure representing a processed bibitem. -/ +structure BibItem where + /-- The cite key as in the bib file. -/ + citekey : String + /-- The tag generated by bib processor, e.g. `[Doe12]`. + Should be plain text and should not be escaped. -/ + tag : String + /-- The HTML generated by bib processor, e.g. + `John Doe. Test. 2012.`-/ + html : String + /-- The plain text form of `html` field. Should not be escaped. -/ + plaintext : String + deriving FromJson, ToJson + +/-- The structure representing a backref item. -/ +structure BackrefItem where + /-- The cite key as in the bib file. -/ + citekey : String + /-- The name of the module. -/ + modName : Name + /-- The name of the function, as a string. It is empty if the backref is in modstring. -/ + funName : String + /-- The index of the backref in that module, starting from zero. -/ + index : Nat + deriving FromJson, ToJson + /-- The context used in the `BaseHtmlM` monad for HTML templating. -/ @@ -33,6 +59,10 @@ structure SiteBaseContext where pages that don't have a module name. -/ currentName : Option Name + /-- + The list of references, as an array. + -/ + refs : Array BibItem /-- The context used in the `HtmlM` monad for HTML templating. @@ -46,6 +76,10 @@ structure SiteContext where A function to link declaration names to their source URLs, usually Github ones. -/ sourceLinker : Name → Option DeclarationRange → String + /-- + The references as a map. + -/ + refsMap : HashMap String BibItem def setCurrentName (name : Name) (ctx : SiteBaseContext) := {ctx with currentName := some name} diff --git a/DocGen4/Output/Class.lean b/DocGen4/Output/Class.lean index 521afc3c..c4d00985 100644 --- a/DocGen4/Output/Class.lean +++ b/DocGen4/Output/Class.lean @@ -15,8 +15,9 @@ def classInstancesToHtml (className : Name) : HtmlM Html := do -def classToHtml (i : Process.ClassInfo) : HtmlM (Array Html) := do - structureToHtml i +def classToHtml (i : Process.ClassInfo) (backrefs : Array BackrefItem) : + HtmlM (Array Html × Array BackrefItem) := do + structureToHtml i backrefs end Output end DocGen4 diff --git a/DocGen4/Output/ClassInductive.lean b/DocGen4/Output/ClassInductive.lean index 746b0054..486b6097 100644 --- a/DocGen4/Output/ClassInductive.lean +++ b/DocGen4/Output/ClassInductive.lean @@ -7,8 +7,9 @@ import DocGen4.Process namespace DocGen4 namespace Output -def classInductiveToHtml (i : Process.ClassInductiveInfo) : HtmlM (Array Html) := do - inductiveToHtml i +def classInductiveToHtml (i : Process.ClassInductiveInfo) (backrefs : Array BackrefItem) : + HtmlM (Array Html × Array BackrefItem) := do + inductiveToHtml i backrefs end Output end DocGen4 diff --git a/DocGen4/Output/DocString.lean b/DocGen4/Output/DocString.lean index 26d6c38e..645d7ce5 100644 --- a/DocGen4/Output/DocString.lean +++ b/DocGen4/Output/DocString.lean @@ -99,7 +99,7 @@ def nameToLink? (s : String) : HtmlM (Option String) := do /-- Extend links with following rules: - 1. if the link starts with `##`, a name search is used and will panic if not found + 1. if the link starts with `##`, a name search is used, and will use `find` if not found 2. if the link starts with `#`, it's treated as id link, no modification 3. if the link starts with `http`, it's an absolute one, no modification 4. otherwise it's a relative link, extend it with base url @@ -110,7 +110,7 @@ def extendLink (s : String) : HtmlM String := do if let some link ← nameToLink? (s.drop 2) then return link else - panic! s!"Cannot find {s.drop 2}, only full name and abbrev in current module is supported" + return s!"{← getRoot}find/?pattern={s.drop 2}#doc" -- for id else if s.startsWith "#" then return s @@ -119,8 +119,43 @@ def extendLink (s : String) : HtmlM String := do return s else return ((← getRoot) ++ s) +/-- Apply function `modifyElement` to an array of `Lean.Xml.Content`s. -/ +def modifyContents (contents : Array Content) (funName : String) (backrefs : Array BackrefItem) + (modifyElement : Element → String → Array BackrefItem → HtmlM (Element × Array BackrefItem)) : + HtmlM (Array Content × Array BackrefItem) := do + let mut newContents : Array Content := #[] + let mut newBackrefs := backrefs + let mut i : Nat := 0 + while h : i < contents.size do + let c := contents.get ⟨i, h⟩ + match c with + | Content.Element e => + let (e', b') ← modifyElement e funName newBackrefs + newContents := newContents.push (Content.Element e') + newBackrefs := b' + | _ => + newContents := newContents.push c + i := i + 1 + return ⟨ newContents, newBackrefs ⟩ + +/-- Apply function `modifyElement` to an array of `Lean.Xml.Element`s. -/ +def modifyElements (elements : Array Element) (funName : String) (backrefs : Array BackrefItem) + (modifyElement : Element → String → Array BackrefItem → HtmlM (Element × Array BackrefItem)) : + HtmlM (Array Element × Array BackrefItem) := do + let mut newElements : Array Element := #[] + let mut newBackrefs := backrefs + let mut i : Nat := 0 + while h : i < elements.size do + let (c, b') ← modifyElement (elements.get ⟨i, h⟩) funName newBackrefs + newElements := newElements.push c + newBackrefs := b' + i := i + 1 + return ⟨ newElements, newBackrefs ⟩ + /-- Add attributes for heading. -/ -def addHeadingAttributes (el : Element) (modifyElement : Element → HtmlM Element) : HtmlM Element := do +def addHeadingAttributes (el : Element) (funName : String) (backrefs : Array BackrefItem) + (modifyElement : Element → String → Array BackrefItem → HtmlM (Element × Array BackrefItem)) : + HtmlM (Element × Array BackrefItem) := do match el with | Element.Element name attrs contents => do let id := xmlGetHeadingId el @@ -131,22 +166,49 @@ def addHeadingAttributes (el : Element) (modifyElement : Element → HtmlM Eleme let newAttrs := attrs |>.insert "id" id |>.insert "class" "markdown-heading" - let newContents := (← - contents.mapM (fun c => match c with - | Content.Element e => return Content.Element (← modifyElement e) - | _ => pure c)) + let (newContents, newBackrefs) ← modifyContents contents funName backrefs modifyElement + return ⟨ ⟨ name, newAttrs, newContents |>.push (Content.Character " ") - |>.push (Content.Element anchor) - return ⟨ name, newAttrs, newContents⟩ + |>.push (Content.Element anchor) ⟩, newBackrefs ⟩ /-- Extend anchor links. -/ -def extendAnchor (el : Element) : HtmlM Element := do +def extendAnchor (el : Element) (funName : String) (backrefs : Array BackrefItem) : + HtmlM (Element × Array BackrefItem) := do match el with | Element.Element name attrs contents => - let newAttrs ← match attrs.find? "href" with - | some href => pure (attrs.insert "href" (← extendLink href)) - | none => pure attrs - return ⟨ name, newAttrs, contents⟩ + match attrs.find? "href" with + | some href => + let refsMap := (← read).refsMap + let bibitem : Option BibItem := + if href.startsWith "references.html#ref_" then + refsMap.find? (href.drop "references.html#ref_".length) + else + .none + let attrs := attrs.insert "href" (← extendLink href) + match bibitem with + | .some bibitem => + let newBackref : BackrefItem := { + citekey := bibitem.citekey + modName := (← readThe SiteBaseContext).currentName.get! + funName := funName + index := backrefs.size + } + let newBackrefs := backrefs.push newBackref + let changeName : Bool := + if contents.size = 1 then + match contents.get! 0 with + | .Character s => s == bibitem.citekey + | _ => false + else + false + let newContents : Array Content := + if changeName then #[.Character bibitem.tag] else contents + let attrs := attrs.insert "title" bibitem.plaintext + |>.insert "id" s!"_backref_{newBackref.index}" + return ⟨ ⟨ name, attrs, newContents ⟩, newBackrefs ⟩ + | .none => + return ⟨ ⟨ name, attrs, contents ⟩, backrefs ⟩ + | none => return ⟨ ⟨ name, attrs, contents ⟩, backrefs ⟩ /-- Automatically add intra documentation link for inline code span. -/ def autoLink (el : Element) : HtmlM Element := do @@ -187,56 +249,60 @@ def autoLink (el : Element) : HtmlM Element := do cats.any (Unicode.isInGeneralCategory c) /-- Core function of modifying the cmark rendered docstring html. -/ -partial def modifyElement (element : Element) : HtmlM Element := +partial def modifyElement (element : Element) (funName : String) (backrefs : Array BackrefItem) : + HtmlM (Element × Array BackrefItem) := match element with | el@(Element.Element name attrs contents) => do -- add id and class to if name = "h1" ∨ name = "h2" ∨ name = "h3" ∨ name = "h4" ∨ name = "h5" ∨ name = "h6" then - addHeadingAttributes el modifyElement + addHeadingAttributes el funName backrefs modifyElement -- extend relative href for else if name = "a" then - extendAnchor el + extendAnchor el funName backrefs -- auto link for inline else if name = "code" ∧ -- don't linkify code blocks explicitly tagged with a language other than lean (((attrs.find? "class").getD "").splitOn.all (fun s => s == "language-lean" || !s.startsWith "language-")) then - autoLink el + return ⟨ ← autoLink el, backrefs ⟩ -- recursively modify else - let newContents ← contents.mapM fun c => match c with - | Content.Element e => return Content.Element (← modifyElement e) - | _ => pure c - return ⟨ name, attrs, newContents ⟩ - --- TODO: remove the following 3 functions --- once is fixed - -private def _root_.Lean.Xml.Attributes.toStringEscaped (as : Attributes) : String := - as.fold (fun s n v => s ++ s!" {n}=\"{Html.escape v}\"") "" - -mutual + let (newContents, newBackrefs) ← modifyContents contents funName backrefs modifyElement + return ⟨ ⟨ name, attrs, newContents ⟩, newBackrefs ⟩ -private partial def _root_.Lean.Xml.eToStringEscaped : Element → String -| Element.Element n a c => s!"<{n}{a.toStringEscaped}>{c.map cToStringEscaped |>.foldl (· ++ ·) ""}" - -private partial def _root_.Lean.Xml.cToStringEscaped : Content → String -| Content.Element e => eToStringEscaped e -| Content.Comment c => s!"" -| Content.Character c => Html.escape c - -end +/-- Find all references in a markdown text. -/ +partial def findAllReferences (refsMap : HashMap String BibItem) (s : String) (i : String.Pos := 0) + (ret : HashSet String := .empty) : HashSet String := + let lps := s.posOfAux '[' s.endPos i + if lps < s.endPos then + let lpe := s.posOfAux ']' s.endPos lps + if lpe < s.endPos then + let citekey := Substring.toString ⟨s, ⟨lps.1 + 1⟩, lpe⟩ + match refsMap.find? citekey with + | .some _ => findAllReferences refsMap s lpe (ret.insert citekey) + | .none => findAllReferences refsMap s lpe ret + else + ret + else + ret /-- Convert docstring to Html. -/ -def docStringToHtml (s : String) : HtmlM (Array Html) := do - let rendered := match MD4Lean.renderHtml s with - | .some res => res - | _ => "" -- TODO: should print some error message - match manyDocument rendered.mkIterator with - | Parsec.ParseResult.success _ res => - -- TODO: use `toString` instead of `eToStringEscaped` - -- once is fixed - res.mapM fun x => do return Html.raw <| eToStringEscaped (← modifyElement x) - | _ => return #[Html.raw rendered] +def docStringToHtml (s : String) (funName : String) (backrefs : Array BackrefItem) : + HtmlM (Array Html × Array BackrefItem) := do + let refsMarkdown := "\n\n" ++ (String.join <| + (findAllReferences (← read).refsMap s).toList.map fun s => + s!"[{s}]: references.html#ref_{s}\n") + match MD4Lean.renderHtml (s ++ refsMarkdown) with + | .some rendered => + match manyDocument rendered.mkIterator with + | Parsec.ParseResult.success _ res => + let (newRes, newBackrefs) ← modifyElements res funName backrefs modifyElement + -- TODO: use `toString` instead of `eToStringEscaped` + -- once is fixed + return (newRes.map fun x => Html.raw (eToStringEscaped x), newBackrefs) + | _ => + return (#[.raw "Error: failed to postprocess: ", .raw rendered], backrefs) + | .none => + return (#[.raw "Error: failed to parse markdown: ", .text s], backrefs) end Output end DocGen4 diff --git a/DocGen4/Output/Inductive.lean b/DocGen4/Output/Inductive.lean index 3d6eaf3b..52e8555e 100644 --- a/DocGen4/Output/Inductive.lean +++ b/DocGen4/Output/Inductive.lean @@ -15,25 +15,36 @@ def instancesForToHtml (typeName : Name) : BaseHtmlM Html := do
    -def ctorToHtml (c : Process.NameInfo) : HtmlM Html := do +def ctorToHtml (c : Process.NameInfo) (backrefs : Array BackrefItem) : + HtmlM (Html × Array BackrefItem) := do let shortName := c.name.componentsRev.head!.toString let name := c.name.toString if let some doc := c.doc then - let renderedDoc ← docStringToHtml doc + let (renderedDoc, newBackrefs) ← docStringToHtml doc name backrefs pure -
  • + (
  • {shortName} : [← infoFormatToHtml c.type]
    [renderedDoc]
    -
  • + , newBackrefs) else pure -
  • + (
  • {shortName} : [← infoFormatToHtml c.type] -
  • + , backrefs) -def inductiveToHtml (i : Process.InductiveInfo) : HtmlM (Array Html) := do - let constructorsHtml :=
      [← i.ctors.toArray.mapM ctorToHtml]
    - return #[constructorsHtml] +def inductiveToHtml (i : Process.InductiveInfo) (backrefs : Array BackrefItem) : + HtmlM (Array Html × Array BackrefItem) := do + let arr := i.ctors.toArray + let mut newArr : Array Html := #[] + let mut newBackrefs := backrefs + let mut idx : Nat := 0 + while h : LT.lt idx arr.size do + let (c, b') ← ctorToHtml (arr.get ⟨idx, h⟩) newBackrefs + newArr := newArr.push c + newBackrefs := b' + idx := idx + 1 + let constructorsHtml :=
      [newArr]
    + return ⟨ #[constructorsHtml], newBackrefs ⟩ end Output end DocGen4 diff --git a/DocGen4/Output/Module.lean b/DocGen4/Output/Module.lean index 1da970e5..22753e05 100644 --- a/DocGen4/Output/Module.lean +++ b/DocGen4/Output/Module.lean @@ -86,18 +86,19 @@ def docInfoHeader (doc : DocInfo) : HtmlM Html := do /-- The main entry point for rendering a single declaration inside a given module. -/ -def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Html := do +def docInfoToHtml (module : Name) (doc : DocInfo) (backrefs : Array BackrefItem) : + HtmlM (Html × Array BackrefItem) := do -- basic info like headers, types, structure fields, etc. - let docInfoHtml ← match doc with - | DocInfo.inductiveInfo i => inductiveToHtml i - | DocInfo.structureInfo i => structureToHtml i - | DocInfo.classInfo i => classToHtml i - | DocInfo.classInductiveInfo i => classInductiveToHtml i - | _ => pure #[] + let (docInfoHtml, backrefs) ← match doc with + | DocInfo.inductiveInfo i => inductiveToHtml i backrefs + | DocInfo.structureInfo i => structureToHtml i backrefs + | DocInfo.classInfo i => classToHtml i backrefs + | DocInfo.classInductiveInfo i => classInductiveToHtml i backrefs + | _ => pure (#[], backrefs) -- rendered doc stirng - let docStringHtml ← match doc.getDocString with - | some s => docStringToHtml s - | none => pure #[] + let (docStringHtml, backrefs) ← match doc.getDocString with + | some s => docStringToHtml s doc.getName.toString backrefs + | none => pure (#[], backrefs) -- extra information like equations and instances let extraInfoHtml ← match doc with | DocInfo.classInfo i => pure #[← classInstancesToHtml i.name] @@ -115,7 +116,7 @@ def docInfoToHtml (module : Name) (doc : DocInfo) : HtmlM Html := do else #[] pure -
    + (
    -
    +
    , backrefs) /-- Rendering a module doc string, that is the ones with an ! after the opener as HTML. -/ -def modDocToHtml (mdoc : ModuleDoc) : HtmlM Html := do +def modDocToHtml (mdoc : ModuleDoc) (backrefs : Array BackrefItem) : + HtmlM (Html × Array BackrefItem) := do + let (h, newBackrefs) ← docStringToHtml mdoc.doc "" backrefs pure -
    - [← docStringToHtml mdoc.doc] -
    + (
    + [h] +
    , newBackrefs) /-- Render a module member, that is either a module doc string or a declaration as HTML. -/ -def moduleMemberToHtml (module : Name) (member : ModuleMember) : HtmlM Html := do +def moduleMemberToHtml (module : Name) (member : ModuleMember) (backrefs : Array BackrefItem) : + HtmlM (Html × Array BackrefItem) := do match member with - | ModuleMember.docInfo d => docInfoToHtml module d - | ModuleMember.modDoc d => modDocToHtml d + | ModuleMember.docInfo d => docInfoToHtml module d backrefs + | ModuleMember.modDoc d => modDocToHtml d backrefs def declarationToNavLink (module : Name) : Html :=