From 8ba8e579ef2d8410b0c2cb02309a84793d810065 Mon Sep 17 00:00:00 2001
From: Thibaut Lienart <tlienart@me.com>
Date: Mon, 23 Sep 2019 16:15:25 +0200
Subject: [PATCH] Footnote (#230)

* initial footnote capacity

* footnote and fixes
- added footnote capacity
- more tests resulting from judoctemplates updating
- more fixes as a result too

* version bump (patch)
---
 Project.toml                  |  3 +-
 src/JuDoc.jl                  |  3 +-
 src/converter/fixer.jl        |  7 ++-
 src/converter/js_prerender.jl |  7 ++-
 src/converter/md.jl           | 11 ++--
 src/converter/md_blocks.jl    | 62 +++++++++++++++++++++--
 src/converter/md_utils.jl     | 21 --------
 src/jd_vars.jl                | 45 +++++++++++++++--
 src/misc_html.jl              | 12 ++++-
 src/parser/md_chars.jl        | 27 ----------
 src/parser/md_tokens.jl       | 24 ++++++---
 src/parser/md_validate.jl     | 94 +++++++++++++++++++++++++++++++++++
 src/parser/ocblocks.jl        | 20 ++++++++
 src/parser/tokens.jl          | 18 ++++++-
 test/converter/markdown3.jl   | 38 +++++++-------
 test/parser/footnotes.jl      | 34 +++++++++++++
 test/runtests.jl              |  1 +
 test/test_utils.jl            |  7 +++
 18 files changed, 339 insertions(+), 95 deletions(-)
 delete mode 100644 src/parser/md_chars.jl
 create mode 100644 src/parser/md_validate.jl
 create mode 100644 test/parser/footnotes.jl

diff --git a/Project.toml b/Project.toml
index 5768b9ffe..0dc04958e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "JuDoc"
 uuid = "4ca9428c-4c75-11e9-2efb-bf5cb6c1e8f8"
 authors = ["Thibaut Lienart <tlienart@me.com>"]
-version = "0.3.1"
+version = "0.3.2"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
@@ -10,6 +10,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 JuDocTemplates = "6793090a-55ae-11e9-0511-73b91164f4ea"
 LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 
 [compat]
diff --git a/src/JuDoc.jl b/src/JuDoc.jl
index 0a0c888d7..8f618c69f 100644
--- a/src/JuDoc.jl
+++ b/src/JuDoc.jl
@@ -6,6 +6,7 @@ using Markdown
 using Markdown: htmlesc
 using Dates # see jd_vars
 using DelimitedFiles: readdlm
+using OrderedCollections
 
 import LiveServer
 
@@ -74,7 +75,7 @@ include("parser/lx_tokens.jl")
 include("parser/lx_blocks.jl")
 # > markdown
 include("parser/md_tokens.jl")
-include("parser/md_chars.jl")
+include("parser/md_validate.jl")
 # > html
 include("parser/html_tokens.jl")
 include("parser/html_blocks.jl")
diff --git a/src/converter/fixer.jl b/src/converter/fixer.jl
index bb61c727d..630838b72 100644
--- a/src/converter/fixer.jl
+++ b/src/converter/fixer.jl
@@ -3,9 +3,9 @@ $(SIGNATURES)
 
 Direct inline-style links are properly processed by Julia's Markdown processor but not:
 
-* `[link title](https://www.google.com "Google's Homepage")`
 * `[link title][some reference]` and later `[some reference]: http://www.reddit.com`
 * `[link title]` and later `[link title]: https://www.mozilla.org`
+* (we don't either) `[link title](https://www.google.com "Google's Homepage")`
 """
 function find_and_fix_md_links(hs::String)::String
     # 1. find all occurences of -- [...]: link
@@ -17,7 +17,10 @@ function find_and_fix_md_links(hs::String)::String
     m_link_defs = collect(eachmatch(r"&#91;((?:(?!&#93;).)*?)&#93;:\s+((?:(?!\<\/p\>)\S)+)", hs))
 
     def_names = [def.captures[1] for def in m_link_defs]
-    def_links = [def.captures[2] for def in m_link_defs]
+
+    # here's a trick, we do NOT use the link caught here; rather we check the dictionary
+    # PAGE_LINK_DEFS as otherwise the link may have been altered by JuDoc (e.g. if has underscores)
+    def_links = [PAGE_LINK_DEFS[def.captures[1]] for def in m_link_defs]
 
     # here we're looking for [id] or [stuff][id] or ![stuff][id] but not [id]:
     m_link_refs = collect(eachmatch(r"(&#33;)?&#91;(.*?)&#93;(?!:)(?:&#91;(.*?)&#93;)?", hs))
diff --git a/src/converter/js_prerender.jl b/src/converter/js_prerender.jl
index 516609099..063fc833c 100644
--- a/src/converter/js_prerender.jl
+++ b/src/converter/js_prerender.jl
@@ -98,7 +98,12 @@ function js2html(hs::String, jsbuffer::IOBuffer, matches::Vector{RegexMatch},
     for i ∈ 1:2:length(matches)-1
         mo, mc = matches[i:i+1]
         write(htmls, subs(hs, head, mo.offset - 1))
-        write(htmls, parts[c])
+        pp = strip(parts[c])
+        if startswith(pp, "<pre><code class=\"julia-repl")
+            pp = replace(pp, r"shell&gt;"=>"<span class=hljs-metas>shell&gt;</span>")
+            pp = replace(pp, r"(\(.*?\)) pkg&gt;"=>s"<span class=hljs-metap>\1 pkg&gt;</span>")
+        end
+        write(htmls, pp)
         head = mc.offset + length(mc.match)
         c += 1
     end
diff --git a/src/converter/md.jl b/src/converter/md.jl
index 5dd763997..2b45c138b 100644
--- a/src/converter/md.jl
+++ b/src/converter/md.jl
@@ -23,6 +23,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
         def_PAGE_HEADERS!()     # all the headers
         def_PAGE_EQREFS!()      # page-specific equation dict (hrefs)
         def_PAGE_BIBREFS!()     # page-specific reference dict (hrefs)
+        def_PAGE_FNREFS!()      # page-specific footnote dict
+        def_PAGE_LINK_DEFS!()   # page-specific link definition candidates [..]: (...)
     end
 
     #
@@ -31,9 +33,10 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #
 
     #> 1. Tokenize
-    tokens = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+    tokens  = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+    fn_refs = validate_footnotes!(tokens)
 
-    #> 1'. Find indented blocks
+    #> 1b. Find indented blocks
     tokens = find_indented_blocks(tokens, mds)
 
     #> 2. Open-Close blocks (OCBlocks)
@@ -45,6 +48,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     filter!(τ -> τ.name ∉ L_RETURNS, tokens)
     #>> d. filter out "fake headers" (opening ### that are not at the start of a line)
     filter!(β -> validate_header_block(β), blocks)
+    #>> e. keep track of literal content of possible link definitions to use
+    validate_and_store_link_defs!(blocks)
 
     #> 3. LaTeX commands
     #>> a. find "newcommands", update active blocks/braces
@@ -70,7 +75,7 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #
 
     #> 1. Merge all the blocks that will need further processing before insertion
-    blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), sp_chars)
+    blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), fn_refs, sp_chars)
 
     #> 2. Form intermediate markdown + html
     inter_md, mblocks = form_inter_md(mds, blocks2insert, lxdefs)
diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl
index bdf79ca07..8c1356e06 100644
--- a/src/converter/md_blocks.jl
+++ b/src/converter/md_blocks.jl
@@ -7,15 +7,16 @@ Helper function for `convert_inter_html` that processes an extracted block given
 function convert_block(β::AbstractBlock, lxcontext::LxContext)::AS
     # case for special characters / html entities
     β isa HTML_SPCH     && return ifelse(isempty(β.r), β.ss, β.r)
-
     # Return relevant interpolated string based on case
     βn = β.name
     βn ∈  MD_HEADER        && return convert_header(β)
     βn == :CODE_INLINE     && return html_code_inline(content(β) |> htmlesc)
     βn == :CODE_BLOCK_LANG && return convert_code_block(β.ss)
     βn == :CODE_BLOCK_IND  && return convert_indented_code_block(β.ss)
-    βn == :CODE_BLOCK      && return html_code(strip(content(β) |> htmlesc), "{{fill lang}}")
+    βn == :CODE_BLOCK      && return html_code(strip(content(β)), "{{fill lang}}")
     βn == :ESCAPE          && return chop(β.ss, head=3, tail=3)
+    βn == :FOOTNOTE_REF    && return convert_footnote_ref(β)
+    βn == :FOOTNOTE_DEF    && return convert_footnote_def(β, lxcontext)
 
     # Math block --> needs to call further processing to resolve possible latex
     βn ∈ MATH_BLOCKS_NAMES && return convert_math_block(β, lxcontext.lxdefs)
@@ -185,5 +186,60 @@ function convert_indented_code_block(ss::SubString)::String
     # 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵)
     code = replace(ss, r"\n(?:\t| {4})" => "\n")
     # 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set
-    return html_code(strip(code) |> htmlesc, "{{fill lang}}")
+    return html_code(strip(code), "{{fill lang}}")
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]` into a html sup object with appropriate ref and backref.
+"""
+function convert_footnote_ref(β::Token)::String
+    # β.ss is [^id]; extract id
+    id = string(match(r"\[\^(.*?)\]", β.ss).captures[1])
+    # add it to the list of refs unless it's been seen before
+    pos = 0
+    for (i, pri) in enumerate(PAGE_FNREFS)
+        if pri == id
+            pos = i
+            break
+        end
+    end
+    if pos == 0
+        push!(PAGE_FNREFS, id)
+        pos = length(PAGE_FNREFS)
+    end
+    return html_sup("fnref:$id", html_ahref("#fndef:$id", "[$pos]"; class="fnref"))
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]: ...` into a html table for the def.
+"""
+function convert_footnote_def(β::OCBlock, lxcontext::LxContext)::String
+    # otok(β) is [^id]:
+    id = match(r"\[\^(.*?)\]:", otok(β).ss).captures[1]
+    pos = 0
+    for (i, pri) in enumerate(PAGE_FNREFS)
+        if pri == id
+            pos = i
+            break
+        end
+    end
+    if pos == 0
+        # this was never referenced before, so probably best not to show it
+        return ""
+    end
+    # need to process the content which could contain stuff
+    ct, _ = convert_md(content(β) * EOS, lxcontext.lxdefs;
+                       isrecursive=true, has_mddefs=false)
+    """
+    <table class="fndef" id="fndef:$id">
+        <tr>
+            <td class=\"fndef-backref\">$(html_ahref("#fnref:$id", "[$pos]"))</td>
+            <td class=\"fndef-content\">$(ct)</td>
+        </tr>
+    </table>
+    """
 end
diff --git a/src/converter/md_utils.jl b/src/converter/md_utils.jl
index dc8152234..945b9c527 100644
--- a/src/converter/md_utils.jl
+++ b/src/converter/md_utils.jl
@@ -48,27 +48,6 @@ function deactivate_divs(blocks::Vector{OCBlock})::Vector{OCBlock}
 end
 
 
-"""
-$(SIGNATURES)
-
-Given a candidate header block, check that the opening `#` is at the start of a line, otherwise
-ignore the block.
-"""
-function validate_header_block(β::OCBlock)::Bool
-    # skip non-header blocks
-    β.name ∈ MD_HEADER || return true
-    # if it's a header block, have a look at the opening token
-    τ = otok(β)
-    # check if it overlaps with the first character
-    from(τ) == 1 && return true
-    # otherwise check if the previous character is a linereturn
-    s = str(β.ss) # does not allocate
-    prevc = s[prevind(str(β.ss), from(τ))]
-    prevc == '\n' && return true
-    return false
-end
-
-
 """
 $(SIGNATURES)
 
diff --git a/src/jd_vars.jl b/src/jd_vars.jl
index d826dbbbc..6564dee05 100644
--- a/src/jd_vars.jl
+++ b/src/jd_vars.jl
@@ -49,12 +49,14 @@ is processed.
     LOCAL_PAGE_VARS["hasmath"]  = Pair(true,    (Bool,))
     LOCAL_PAGE_VARS["hascode"]  = Pair(false,   (Bool,))
     LOCAL_PAGE_VARS["date"]     = Pair(Date(1), (String, Date, Nothing))
-    LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,))   # time of creation
-    LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,))   # time of last modification
-    LOCAL_PAGE_VARS["jd_rpath"] = Pair("",      (String,)) # local path to file src/[...]/blah.md
     LOCAL_PAGE_VARS["lang"]     = Pair("julia", (String,)) # default lang for indented code
     LOCAL_PAGE_VARS["reflinks"] = Pair(true,    (Bool,))   # whether there are reflinks or not
 
+    # page vars used by judoc, should not be accessed or defined
+    LOCAL_PAGE_VARS["jd_ctime"]  = Pair(Date(1), (Date,))   # time of creation
+    LOCAL_PAGE_VARS["jd_mtime"]  = Pair(Date(1), (Date,))   # time of last modification
+    LOCAL_PAGE_VARS["jd_rpath"]  = Pair("",      (String,)) # local path to file src/[...]/blah.md
+
     # If there are GLOBAL vars that are defined, they take precedence
     local_keys   = keys(LOCAL_PAGE_VARS)
     for k in keys(GLOBAL_PAGE_VARS)
@@ -73,9 +75,10 @@ the title, the refstring version of the title, the occurence number and the leve
 """
 const PAGE_HEADERS = Dict{Int,Tuple{AS,AS,Int,Int}}()
 
-
 """
 $(SIGNATURES)
+
+Empties `PAGE_HEADERS`.
 """
 @inline function def_PAGE_HEADERS!()::Nothing
     empty!(PAGE_HEADERS)
@@ -83,6 +86,40 @@ $(SIGNATURES)
 end
 
 
+"""
+PAGE_FNREFS
+
+Keep track of name of seen footnotes; the order is kept as it's a list.
+"""
+const PAGE_FNREFS = String[]
+
+"""
+$(SIGNATURES)
+
+Empties `PAGE_FNREFS`.
+"""
+@inline function def_PAGE_FNREFS!()::Nothing
+    empty!(PAGE_FNREFS)
+    return nothing
+end
+
+"""
+PAGE_LINK_DEFS
+
+Keep track of link def candidates
+"""
+const PAGE_LINK_DEFS = LittleDict{String,String}()
+
+"""
+$(SIGNATURES)
+
+Empties `PAGE_LINK_DEFS`.
+"""
+@inline function def_PAGE_LINK_DEFS!()::Nothing
+    empty!(PAGE_LINK_DEFS)
+    return nothing
+end
+
 """
 GLOBAL_LXDEFS
 
diff --git a/src/misc_html.jl b/src/misc_html.jl
index 89b87121a..b75f190d6 100644
--- a/src/misc_html.jl
+++ b/src/misc_html.jl
@@ -12,6 +12,13 @@ Convenience function for a list item
 """
 html_li(in::AS) = "<li>$(in)</li>"
 
+"""
+$SIGNATURES
+
+Convenience function for a sup item
+"""
+html_sup(id::String, in::AS) =  "<sup id=\"$id\">$in</sup>"
+
 """
 $(SIGNATURES)
 
@@ -25,9 +32,10 @@ $(SIGNATURES)
 Convenience function to introduce a hyper reference.
 """
 function html_ahref(link::AS, name::Union{Int,AS};
-                    title::AS="")
-    a  = "<a href=\"$link\""
+                    title::AS="", class::AS="")
+    a  = "<a href=\"$(htmlesc(link))\""
     a *= attr(:title, title)
+    a *= attr(:class, class)
     a *= ">$name</a>"
     a
 end
diff --git a/src/parser/md_chars.jl b/src/parser/md_chars.jl
deleted file mode 100644
index 8004e23e9..000000000
--- a/src/parser/md_chars.jl
+++ /dev/null
@@ -1,27 +0,0 @@
-"""
-$SIGNATURES
-
-Take a list of token and return those corresponding to special characters or html entities wrapped
-in `HTML_SPCH` types (will be left alone by the markdown conversion and be inserted as is in the
-HTML).
-"""
-function find_special_chars(tokens::Vector{Token})
-    spch = Vector{HTML_SPCH}()
-    isempty(tokens) && return spch
-    for τ in tokens
-        τ.name == :CHAR_BACKSPACE   && push!(spch, HTML_SPCH(τ.ss, "&#92;"))
-        τ.name == :CHAR_BACKTICK    && push!(spch, HTML_SPCH(τ.ss, "&#96;"))
-        τ.name == :CHAR_LINEBREAK   && push!(spch, HTML_SPCH(τ.ss, "<br/>"))
-        τ.name == :CHAR_HTML_ENTITY && verify_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss))
-    end
-    return spch
-end
-
-"""
-$SIGNATURES
-
-Verify that a given string corresponds to a well formed html entity.
-"""
-function verify_html_entity(ss::AS)
-    match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing
-end
diff --git a/src/parser/md_tokens.jl b/src/parser/md_tokens.jl
index c7b4dfbd9..1de6b4a64 100644
--- a/src/parser/md_tokens.jl
+++ b/src/parser/md_tokens.jl
@@ -36,6 +36,10 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
              ],
     '~'  => [ isexactly("~~~")  => :ESCAPE,           # ~~~  ... ~~~
              ],
+    '['  => [ incrlook(is_footnote) => :FOOTNOTE_REF,    # [^...](:)? defs will be separated after
+             ],
+    ']'  => [ isexactly("]: ") => :LINK_DEF,
+             ],
     '\\' => [ isexactly("\\{")        => :INACTIVE,         # See note [^1]
               isexactly("\\}")        => :INACTIVE,         # See note [^1]
               isexactly("\\\$")       => :INACTIVE,         # See note [^1]
@@ -43,6 +47,8 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
               isexactly("\\]")        => :MATH_C_CLOSE,     #    ... \]
               isexactly("\\begin{align}")    => :MATH_ALIGN_OPEN,
               isexactly("\\end{align}")      => :MATH_ALIGN_CLOSE,
+              isexactly("\\begin{equation}") => :MATH_D_OPEN,
+              isexactly("\\end{equation}")   => :MATH_D_CLOSE,
               isexactly("\\begin{eqnarray}") => :MATH_EQA_OPEN,
               isexactly("\\end{eqnarray}")   => :MATH_EQA_CLOSE,
               isexactly("\\newcommand")      => :LX_NEWCOMMAND,
@@ -126,13 +132,15 @@ content which is needed to find latex definitions (see parser/markdown/find_bloc
 const MD_OCB = [
     # name                    opening token   closing token(s)     nestable
     # ---------------------------------------------------------------------
-    OCProto(:COMMENT,         :COMMENT_OPEN, (:COMMENT_CLOSE,),    false),
-    OCProto(:CODE_BLOCK_LANG, :CODE_LANG,    (:CODE_TRIPLE,),      false),
-    OCProto(:CODE_BLOCK,      :CODE_TRIPLE,  (:CODE_TRIPLE,),      false),
-    OCProto(:CODE_BLOCK_IND,  :LR_INDENT,    (:LINE_RETURN,),      false),
-    OCProto(:CODE_INLINE,     :CODE_DOUBLE,  (:CODE_DOUBLE,),      false),
-    OCProto(:CODE_INLINE,     :CODE_SINGLE,  (:CODE_SINGLE,),      false),
-    OCProto(:ESCAPE,          :ESCAPE,       (:ESCAPE,),           false),
+    OCProto(:COMMENT,         :COMMENT_OPEN, (:COMMENT_CLOSE,), false),
+    OCProto(:CODE_BLOCK_LANG, :CODE_LANG,    (:CODE_TRIPLE,),   false),
+    OCProto(:CODE_BLOCK,      :CODE_TRIPLE,  (:CODE_TRIPLE,),   false),
+    OCProto(:CODE_BLOCK_IND,  :LR_INDENT,    (:LINE_RETURN,),   false),
+    OCProto(:CODE_INLINE,     :CODE_DOUBLE,  (:CODE_DOUBLE,),   false),
+    OCProto(:CODE_INLINE,     :CODE_SINGLE,  (:CODE_SINGLE,),   false),
+    OCProto(:ESCAPE,          :ESCAPE,       (:ESCAPE,),        false),
+    OCProto(:FOOTNOTE_DEF,    :FOOTNOTE_DEF, (:LINE_RETURN,),   false),
+    OCProto(:LINK_DEF,        :LINK_DEF,     (:LINE_RETURN,),   false),
     # ------------------------------------------------------------------
     OCProto(:H1,              :H1_OPEN,      (L_RETURNS..., :EOS), false), # see [^3]
     OCProto(:H2,              :H2_OPEN,      (L_RETURNS..., :EOS), false),
@@ -161,7 +169,6 @@ All header symbols.
 const MD_HEADER = (:H1, :H2, :H3, :H4, :H5, :H6)
 
 
-
 """
 MD_OCB_ESC
 
@@ -182,6 +189,7 @@ const MD_OCB_MATH = [
     OCProto(:MATH_A,     :MATH_A,          (:MATH_A,),           false),
     OCProto(:MATH_B,     :MATH_B,          (:MATH_B,),           false),
     OCProto(:MATH_C,     :MATH_C_OPEN,     (:MATH_C_CLOSE,),     false),
+    OCProto(:MATH_C,     :MATH_D_OPEN,     (:MATH_D_CLOSE,),     false),
     OCProto(:MATH_I,     :MATH_I_OPEN,     (:MATH_I_CLOSE,),     false),
     OCProto(:MATH_ALIGN, :MATH_ALIGN_OPEN, (:MATH_ALIGN_CLOSE,), false),
     OCProto(:MATH_EQA,   :MATH_EQA_OPEN,   (:MATH_EQA_CLOSE,),   false),
diff --git a/src/parser/md_validate.jl b/src/parser/md_validate.jl
new file mode 100644
index 000000000..f379e9cb2
--- /dev/null
+++ b/src/parser/md_validate.jl
@@ -0,0 +1,94 @@
+"""
+$SIGNATURES
+
+Find footnotes refs and defs and eliminate the ones that don't verify the appropriate regex.
+For a footnote ref: `\\[\\^[a-zA-Z0-0]+\\]` and `\\[\\^[a-zA-Z0-0]+\\]:` for the def.
+"""
+function validate_footnotes!(tokens::Vector{Token})
+    fn_refs = Vector{Token}()
+    rm      = Int[]
+    for (i, τ) in enumerate(tokens)
+        if τ.name == :FOOTNOTE_REF
+            # footnote ref [^1]:
+            m = match(r"^\[\^[a-zA-Z0-9]+\](:)?$", τ.ss)
+            if m !== nothing
+                if m.captures[1] !== nothing
+                    # it's a def
+                    tokens[i] = Token(:FOOTNOTE_DEF, τ.ss)
+                else
+                    # it's a ref, take and delete
+                    push!(fn_refs, τ)
+                    push!(rm, i)
+                end
+            else
+                # delete
+                push!(rm, i)
+            end
+        end
+    end
+    deleteat!(tokens, rm)
+    return fn_refs
+end
+
+"""
+$SIGNATURES
+
+Verify that a given string corresponds to a well formed html entity.
+"""
+function validate_html_entity(ss::AS)
+    match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing
+end
+
+"""
+$(SIGNATURES)
+
+Given a candidate header block, check that the opening `#` is at the start of a line, otherwise
+ignore the block.
+"""
+function validate_header_block(β::OCBlock)::Bool
+    # skip non-header blocks
+    β.name ∈ MD_HEADER || return true
+    # if it's a header block, have a look at the opening token
+    τ = otok(β)
+    # check if it overlaps with the first character
+    from(τ) == 1 && return true
+    # otherwise check if the previous character is a linereturn
+    s = str(β.ss) # does not allocate
+    prevc = s[prevind(str(β.ss), from(τ))]
+    prevc == '\n' && return true
+    return false
+end
+
+
+"""
+$(SIGNATURES)
+
+Keep track of link defs.
+"""
+function validate_and_store_link_defs!(blocks::Vector{OCBlock})::Nothing
+    isempty(blocks) && return
+    rm = Int[]
+    parent = str(blocks[1])
+    for (i, β) in enumerate(blocks)
+        if β.name == :LINK_DEF
+            # incremental backward look until we find a `[` or a `\n` if `\n` first, discard
+            ini  = prevind(parent, from(β))
+            k    = ini
+            char = '\n'
+            while k ≥ 1
+                char = parent[k]
+                char ∈ ('[','\n') && break
+                k = prevind(parent, k)
+            end
+            if char == '['
+                # we have a [id]: lk add it to PAGE_LINK_DEFS
+                id = string(subs(parent, nextind(parent, k), ini))
+                lk = β |> content |> strip |> string
+                PAGE_LINK_DEFS[id] = lk
+            end
+            push!(rm, i)
+        end
+    end
+    deleteat!(blocks, rm)
+    return nothing
+end
diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl
index a24744836..a472c3fc9 100644
--- a/src/parser/ocblocks.jl
+++ b/src/parser/ocblocks.jl
@@ -208,3 +208,23 @@ function form_super_block!(blocks::Vector{OCBlock}, idx::Vector{Int},
     empty!(curseq)
     return
 end
+
+
+"""
+$SIGNATURES
+
+Take a list of token and return those corresponding to special characters or html entities wrapped
+in `HTML_SPCH` types (will be left alone by the markdown conversion and be inserted as is in the
+HTML).
+"""
+function find_special_chars(tokens::Vector{Token})
+    spch = Vector{HTML_SPCH}()
+    isempty(tokens) && return spch
+    for τ in tokens
+        τ.name == :CHAR_BACKSPACE   && push!(spch, HTML_SPCH(τ.ss, "&#92;"))
+        τ.name == :CHAR_BACKTICK    && push!(spch, HTML_SPCH(τ.ss, "&#96;"))
+        τ.name == :CHAR_LINEBREAK   && push!(spch, HTML_SPCH(τ.ss, "<br/>"))
+        τ.name == :CHAR_HTML_ENTITY && validate_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss))
+    end
+    return spch
+end
diff --git a/src/parser/tokens.jl b/src/parser/tokens.jl
index d45b5d73c..56d86c4c1 100644
--- a/src/parser/tokens.jl
+++ b/src/parser/tokens.jl
@@ -178,6 +178,12 @@ Check whether `c` is a letter or is in a vector of character `ac`.
 """
 α(c::Char, ac::NTuple{K,Char}=()) where {K} = isletter(c) || (c ∈ ac)
 
+"""
+$(SIGNATURES)
+
+Check whether `c` is alpha numeric or in vector of character `ac`
+"""
+αη(c::Char, ac::NTuple{K,Char}=()) where {K} = α(c, tuple(ac..., ("$i"[1] for i in 0:9)...))
 
 """
 $(SIGNATURES)
@@ -217,8 +223,18 @@ In combination with `incrlook`, checks to see if we have something that looks li
 Note that there can be fake matches, so this will need to be validated later on; if validated
 it will be treated as HTML; otherwise it will be shown as markdown. Triggerin char is a `&`.
 """
-is_html_entity(i::Int, c::Char) = α(c, ('#',';','0','1','2','3','4','5','6','7','8','9','0'))
+is_html_entity(i::Int, c::Char) = αη(c, ('#',';'))
 
+"""
+$(SIGNATURES)
+
+Check if it looks like `\\[\\^[a-zA-Z0-9]+\\]:`.
+"""
+function is_footnote(i::Int, c::Char)
+    i == 1 && return c == '^'
+    i == 2 && return αη(c)
+    i > 2  && return αη(c, (']', ':'))
+end
 
 """
 TokenFinder
diff --git a/test/converter/markdown3.jl b/test/converter/markdown3.jl
index b69d683d3..4e6b8dd06 100644
--- a/test/converter/markdown3.jl
+++ b/test/converter/markdown3.jl
@@ -229,6 +229,9 @@ end
 end
 
 
+
+
+
 @testset "IndCode" begin # issue 207
     st = raw"""
         A
@@ -241,19 +244,16 @@ end
         end
         """ * J.EOS
     @test isapproxstr(st |> seval, raw"""
-                        <p>
-                        A
-                        <pre><code class="language-julia">
-                        a &#61; 1&#43;1
-                        if a &gt; 1
-                            @show a
-                        end
-                        b &#61; 2
-                        @show a&#43;b
-                        </code></pre>
-                        end
-                        </p>
-                        """)
+        <p>
+            A
+            <pre><code class="language-julia">a = 1+1
+            if a > 1
+                @show a
+            end
+            b = 2
+            @show a+b</code></pre>
+            end
+        </p>""")
 
     st = raw"""
         A `single` and ```python blah``` and
@@ -267,14 +267,10 @@ end
     @test isapproxstr(st |> seval, raw"""
                         <p>
                         A <code>single</code> and
-                        <pre><code class="language-python">
-                        blah
-                        </code></pre>
-                        and
-                        <pre><code class="language-julia">
-                        a &#61; 1&#43;1
-                        </code></pre>
-                        then</p>
+                        <pre><code class="language-python">blah</code></pre>
+                        and<pre><code class="language-julia">a = 1+1</code></pre>
+                        then
+                        </p>
                         <ul>
                           <li><p>blah</p>
                             <ul>
diff --git a/test/parser/footnotes.jl b/test/parser/footnotes.jl
new file mode 100644
index 000000000..c74fd0252
--- /dev/null
+++ b/test/parser/footnotes.jl
@@ -0,0 +1,34 @@
+@testset "footnotes" begin
+    st = """
+        A[^1] B[^blah] C
+        """ * J.EOS
+    @test isapproxstr(st |> seval, """
+           <p>A<sup id="fnref:1"><a href="#fndef:1" class="fnref">[1]</a></sup>
+              B<sup id="fnref:blah"><a href="#fndef:blah" class="fnref">[2]</a></sup>
+              C</p>""")
+    st = """
+        A[^1] B[^blah]
+        C
+        [^1]: first footnote
+        [^blah]: second footnote
+        """ * J.EOS
+    @test isapproxstr(st |> seval, """
+            <p>
+                A
+                <sup id="fnref:1"><a href="#fndef:1" class="fnref">[1]</a></sup>
+                B<sup id="fnref:blah"><a href="#fndef:blah" class="fnref">[2]</a></sup>
+                C
+                <table class="fndef" id="fndef:1">
+                <tr>
+                    <td class="fndef-backref"><a href="#fnref:1">[1]</a></td>
+                    <td class="fndef-content">first footnote</td>
+                </tr>
+                </table>
+                <table class="fndef" id="fndef:blah">
+                    <tr>
+                        <td class="fndef-backref"><a href="#fnref:blah">[2]</a></td>
+                        <td class="fndef-content">second footnote</td>
+                    </tr>
+                </table>
+            </p>""")
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index c482b4766..0c0a29e3d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,6 +16,7 @@ println("🍺")
 # PARSER folder
 println("PARSER/MD+LX")
 include("parser/markdown+latex.jl")
+include("parser/footnotes.jl")
 println("🍺")
 println("PARSER/HTML")
 include("parser/html.jl")
diff --git a/test/test_utils.jl b/test/test_utils.jl
index 41e1d51f3..0539c7cb7 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -36,6 +36,10 @@ end
 function explore_md_steps(mds)
     J.def_GLOBAL_PAGE_VARS!()
     J.def_GLOBAL_LXDEFS!()
+    J.def_LOCAL_PAGE_VARS!()
+    J.def_PAGE_EQREFS!()
+    J.def_PAGE_BIBREFS!()
+    J.def_PAGE_FNREFS!()
 
     steps = OrderedDict{Symbol,NamedTuple}()
 
@@ -44,6 +48,9 @@ function explore_md_steps(mds)
     tokens = J.find_indented_blocks(tokens, mds)
     steps[:tokenization] = (tokens=tokens,)
 
+    fn_refs = J.validate_footnotes!(tokens)
+    steps[:fn_validation] = (tokens=tokens, fn_refs=fn_refs)
+
     # ocblocks
     blocks, tokens = J.find_all_ocblocks(tokens, J.MD_OCB_ALL)
     steps[:ocblocks] = (blocks=blocks, tokens=tokens)