From 8ba8e579ef2d8410b0c2cb02309a84793d810065 Mon Sep 17 00:00:00 2001 From: Thibaut Lienart Date: Mon, 23 Sep 2019 16:15:25 +0200 Subject: [PATCH] Footnote (#230) * initial footnote capacity * footnote and fixes - added footnote capacity - more tests resulting from judoctemplates updating - more fixes as a result too * version bump (patch) --- Project.toml | 3 +- src/JuDoc.jl | 3 +- src/converter/fixer.jl | 7 ++- src/converter/js_prerender.jl | 7 ++- src/converter/md.jl | 11 ++-- src/converter/md_blocks.jl | 62 +++++++++++++++++++++-- src/converter/md_utils.jl | 21 -------- src/jd_vars.jl | 45 +++++++++++++++-- src/misc_html.jl | 12 ++++- src/parser/md_chars.jl | 27 ---------- src/parser/md_tokens.jl | 24 ++++++--- src/parser/md_validate.jl | 94 +++++++++++++++++++++++++++++++++++ src/parser/ocblocks.jl | 20 ++++++++ src/parser/tokens.jl | 18 ++++++- test/converter/markdown3.jl | 38 +++++++------- test/parser/footnotes.jl | 34 +++++++++++++ test/runtests.jl | 1 + test/test_utils.jl | 7 +++ 18 files changed, 339 insertions(+), 95 deletions(-) delete mode 100644 src/parser/md_chars.jl create mode 100644 src/parser/md_validate.jl create mode 100644 test/parser/footnotes.jl diff --git a/Project.toml b/Project.toml index 5768b9ffe..0dc04958e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "JuDoc" uuid = "4ca9428c-4c75-11e9-2efb-bf5cb6c1e8f8" authors = ["Thibaut Lienart "] -version = "0.3.1" +version = "0.3.2" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -10,6 +10,7 @@ DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" JuDocTemplates = "6793090a-55ae-11e9-0511-73b91164f4ea" LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [compat] diff --git a/src/JuDoc.jl b/src/JuDoc.jl index 0a0c888d7..8f618c69f 100644 --- a/src/JuDoc.jl +++ b/src/JuDoc.jl @@ -6,6 +6,7 @@ using Markdown using Markdown: htmlesc using Dates # see jd_vars using DelimitedFiles: readdlm +using OrderedCollections import LiveServer @@ -74,7 +75,7 @@ include("parser/lx_tokens.jl") include("parser/lx_blocks.jl") # > markdown include("parser/md_tokens.jl") -include("parser/md_chars.jl") +include("parser/md_validate.jl") # > html include("parser/html_tokens.jl") include("parser/html_blocks.jl") diff --git a/src/converter/fixer.jl b/src/converter/fixer.jl index bb61c727d..630838b72 100644 --- a/src/converter/fixer.jl +++ b/src/converter/fixer.jl @@ -3,9 +3,9 @@ $(SIGNATURES) Direct inline-style links are properly processed by Julia's Markdown processor but not: -* `[link title](https://www.google.com "Google's Homepage")` * `[link title][some reference]` and later `[some reference]: http://www.reddit.com` * `[link title]` and later `[link title]: https://www.mozilla.org` +* (we don't either) `[link title](https://www.google.com "Google's Homepage")` """ function find_and_fix_md_links(hs::String)::String # 1. find all occurences of -- [...]: link @@ -17,7 +17,10 @@ function find_and_fix_md_links(hs::String)::String m_link_defs = collect(eachmatch(r"[((?:(?!]).)*?)]:\s+((?:(?!\<\/p\>)\S)+)", hs)) def_names = [def.captures[1] for def in m_link_defs] - def_links = [def.captures[2] for def in m_link_defs] + + # here's a trick, we do NOT use the link caught here; rather we check the dictionary + # PAGE_LINK_DEFS as otherwise the link may have been altered by JuDoc (e.g. if has underscores) + def_links = [PAGE_LINK_DEFS[def.captures[1]] for def in m_link_defs] # here we're looking for [id] or [stuff][id] or ![stuff][id] but not [id]: m_link_refs = collect(eachmatch(r"(!)?[(.*?)](?!:)(?:[(.*?)])?", hs)) diff --git a/src/converter/js_prerender.jl b/src/converter/js_prerender.jl index 516609099..063fc833c 100644 --- a/src/converter/js_prerender.jl +++ b/src/converter/js_prerender.jl @@ -98,7 +98,12 @@ function js2html(hs::String, jsbuffer::IOBuffer, matches::Vector{RegexMatch}, for i ∈ 1:2:length(matches)-1 mo, mc = matches[i:i+1] write(htmls, subs(hs, head, mo.offset - 1)) - write(htmls, parts[c]) + pp = strip(parts[c]) + if startswith(pp, "
"shell>")
+            pp = replace(pp, r"(\(.*?\)) pkg>"=>s"\1 pkg>")
+        end
+        write(htmls, pp)
         head = mc.offset + length(mc.match)
         c += 1
     end
diff --git a/src/converter/md.jl b/src/converter/md.jl
index 5dd763997..2b45c138b 100644
--- a/src/converter/md.jl
+++ b/src/converter/md.jl
@@ -23,6 +23,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
         def_PAGE_HEADERS!()     # all the headers
         def_PAGE_EQREFS!()      # page-specific equation dict (hrefs)
         def_PAGE_BIBREFS!()     # page-specific reference dict (hrefs)
+        def_PAGE_FNREFS!()      # page-specific footnote dict
+        def_PAGE_LINK_DEFS!()   # page-specific link definition candidates [..]: (...)
     end
 
     #
@@ -31,9 +33,10 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #
 
     #> 1. Tokenize
-    tokens = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+    tokens  = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+    fn_refs = validate_footnotes!(tokens)
 
-    #> 1'. Find indented blocks
+    #> 1b. Find indented blocks
     tokens = find_indented_blocks(tokens, mds)
 
     #> 2. Open-Close blocks (OCBlocks)
@@ -45,6 +48,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     filter!(τ -> τ.name ∉ L_RETURNS, tokens)
     #>> d. filter out "fake headers" (opening ### that are not at the start of a line)
     filter!(β -> validate_header_block(β), blocks)
+    #>> e. keep track of literal content of possible link definitions to use
+    validate_and_store_link_defs!(blocks)
 
     #> 3. LaTeX commands
     #>> a. find "newcommands", update active blocks/braces
@@ -70,7 +75,7 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #
 
     #> 1. Merge all the blocks that will need further processing before insertion
-    blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), sp_chars)
+    blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), fn_refs, sp_chars)
 
     #> 2. Form intermediate markdown + html
     inter_md, mblocks = form_inter_md(mds, blocks2insert, lxdefs)
diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl
index bdf79ca07..8c1356e06 100644
--- a/src/converter/md_blocks.jl
+++ b/src/converter/md_blocks.jl
@@ -7,15 +7,16 @@ Helper function for `convert_inter_html` that processes an extracted block given
 function convert_block(β::AbstractBlock, lxcontext::LxContext)::AS
     # case for special characters / html entities
     β isa HTML_SPCH     && return ifelse(isempty(β.r), β.ss, β.r)
-
     # Return relevant interpolated string based on case
     βn = β.name
     βn ∈  MD_HEADER        && return convert_header(β)
     βn == :CODE_INLINE     && return html_code_inline(content(β) |> htmlesc)
     βn == :CODE_BLOCK_LANG && return convert_code_block(β.ss)
     βn == :CODE_BLOCK_IND  && return convert_indented_code_block(β.ss)
-    βn == :CODE_BLOCK      && return html_code(strip(content(β) |> htmlesc), "{{fill lang}}")
+    βn == :CODE_BLOCK      && return html_code(strip(content(β)), "{{fill lang}}")
     βn == :ESCAPE          && return chop(β.ss, head=3, tail=3)
+    βn == :FOOTNOTE_REF    && return convert_footnote_ref(β)
+    βn == :FOOTNOTE_DEF    && return convert_footnote_def(β, lxcontext)
 
     # Math block --> needs to call further processing to resolve possible latex
     βn ∈ MATH_BLOCKS_NAMES && return convert_math_block(β, lxcontext.lxdefs)
@@ -185,5 +186,60 @@ function convert_indented_code_block(ss::SubString)::String
     # 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵)
     code = replace(ss, r"\n(?:\t| {4})" => "\n")
     # 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set
-    return html_code(strip(code) |> htmlesc, "{{fill lang}}")
+    return html_code(strip(code), "{{fill lang}}")
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]` into a html sup object with appropriate ref and backref.
+"""
+function convert_footnote_ref(β::Token)::String
+    # β.ss is [^id]; extract id
+    id = string(match(r"\[\^(.*?)\]", β.ss).captures[1])
+    # add it to the list of refs unless it's been seen before
+    pos = 0
+    for (i, pri) in enumerate(PAGE_FNREFS)
+        if pri == id
+            pos = i
+            break
+        end
+    end
+    if pos == 0
+        push!(PAGE_FNREFS, id)
+        pos = length(PAGE_FNREFS)
+    end
+    return html_sup("fnref:$id", html_ahref("#fndef:$id", "[$pos]"; class="fnref"))
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]: ...` into a html table for the def.
+"""
+function convert_footnote_def(β::OCBlock, lxcontext::LxContext)::String
+    # otok(β) is [^id]:
+    id = match(r"\[\^(.*?)\]:", otok(β).ss).captures[1]
+    pos = 0
+    for (i, pri) in enumerate(PAGE_FNREFS)
+        if pri == id
+            pos = i
+            break
+        end
+    end
+    if pos == 0
+        # this was never referenced before, so probably best not to show it
+        return ""
+    end
+    # need to process the content which could contain stuff
+    ct, _ = convert_md(content(β) * EOS, lxcontext.lxdefs;
+                       isrecursive=true, has_mddefs=false)
+    """
+    
+        
+            
+            
+        
+    
$(html_ahref("#fnref:$id", "[$pos]"))$(ct)
+ """ end diff --git a/src/converter/md_utils.jl b/src/converter/md_utils.jl index dc8152234..945b9c527 100644 --- a/src/converter/md_utils.jl +++ b/src/converter/md_utils.jl @@ -48,27 +48,6 @@ function deactivate_divs(blocks::Vector{OCBlock})::Vector{OCBlock} end -""" -$(SIGNATURES) - -Given a candidate header block, check that the opening `#` is at the start of a line, otherwise -ignore the block. -""" -function validate_header_block(β::OCBlock)::Bool - # skip non-header blocks - β.name ∈ MD_HEADER || return true - # if it's a header block, have a look at the opening token - τ = otok(β) - # check if it overlaps with the first character - from(τ) == 1 && return true - # otherwise check if the previous character is a linereturn - s = str(β.ss) # does not allocate - prevc = s[prevind(str(β.ss), from(τ))] - prevc == '\n' && return true - return false -end - - """ $(SIGNATURES) diff --git a/src/jd_vars.jl b/src/jd_vars.jl index d826dbbbc..6564dee05 100644 --- a/src/jd_vars.jl +++ b/src/jd_vars.jl @@ -49,12 +49,14 @@ is processed. LOCAL_PAGE_VARS["hasmath"] = Pair(true, (Bool,)) LOCAL_PAGE_VARS["hascode"] = Pair(false, (Bool,)) LOCAL_PAGE_VARS["date"] = Pair(Date(1), (String, Date, Nothing)) - LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,)) # time of creation - LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,)) # time of last modification - LOCAL_PAGE_VARS["jd_rpath"] = Pair("", (String,)) # local path to file src/[...]/blah.md LOCAL_PAGE_VARS["lang"] = Pair("julia", (String,)) # default lang for indented code LOCAL_PAGE_VARS["reflinks"] = Pair(true, (Bool,)) # whether there are reflinks or not + # page vars used by judoc, should not be accessed or defined + LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,)) # time of creation + LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,)) # time of last modification + LOCAL_PAGE_VARS["jd_rpath"] = Pair("", (String,)) # local path to file src/[...]/blah.md + # If there are GLOBAL vars that are defined, they take precedence local_keys = keys(LOCAL_PAGE_VARS) for k in keys(GLOBAL_PAGE_VARS) @@ -73,9 +75,10 @@ the title, the refstring version of the title, the occurence number and the leve """ const PAGE_HEADERS = Dict{Int,Tuple{AS,AS,Int,Int}}() - """ $(SIGNATURES) + +Empties `PAGE_HEADERS`. """ @inline function def_PAGE_HEADERS!()::Nothing empty!(PAGE_HEADERS) @@ -83,6 +86,40 @@ $(SIGNATURES) end +""" +PAGE_FNREFS + +Keep track of name of seen footnotes; the order is kept as it's a list. +""" +const PAGE_FNREFS = String[] + +""" +$(SIGNATURES) + +Empties `PAGE_FNREFS`. +""" +@inline function def_PAGE_FNREFS!()::Nothing + empty!(PAGE_FNREFS) + return nothing +end + +""" +PAGE_LINK_DEFS + +Keep track of link def candidates +""" +const PAGE_LINK_DEFS = LittleDict{String,String}() + +""" +$(SIGNATURES) + +Empties `PAGE_LINK_DEFS`. +""" +@inline function def_PAGE_LINK_DEFS!()::Nothing + empty!(PAGE_LINK_DEFS) + return nothing +end + """ GLOBAL_LXDEFS diff --git a/src/misc_html.jl b/src/misc_html.jl index 89b87121a..b75f190d6 100644 --- a/src/misc_html.jl +++ b/src/misc_html.jl @@ -12,6 +12,13 @@ Convenience function for a list item """ html_li(in::AS) = "
  • $(in)
  • " +""" +$SIGNATURES + +Convenience function for a sup item +""" +html_sup(id::String, in::AS) = "$in" + """ $(SIGNATURES) @@ -25,9 +32,10 @@ $(SIGNATURES) Convenience function to introduce a hyper reference. """ function html_ahref(link::AS, name::Union{Int,AS}; - title::AS="") - a = "")) - τ.name == :CHAR_HTML_ENTITY && verify_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss)) - end - return spch -end - -""" -$SIGNATURES - -Verify that a given string corresponds to a well formed html entity. -""" -function verify_html_entity(ss::AS) - match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing -end diff --git a/src/parser/md_tokens.jl b/src/parser/md_tokens.jl index c7b4dfbd9..1de6b4a64 100644 --- a/src/parser/md_tokens.jl +++ b/src/parser/md_tokens.jl @@ -36,6 +36,10 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}( ], '~' => [ isexactly("~~~") => :ESCAPE, # ~~~ ... ~~~ ], + '[' => [ incrlook(is_footnote) => :FOOTNOTE_REF, # [^...](:)? defs will be separated after + ], + ']' => [ isexactly("]: ") => :LINK_DEF, + ], '\\' => [ isexactly("\\{") => :INACTIVE, # See note [^1] isexactly("\\}") => :INACTIVE, # See note [^1] isexactly("\\\$") => :INACTIVE, # See note [^1] @@ -43,6 +47,8 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}( isexactly("\\]") => :MATH_C_CLOSE, # ... \] isexactly("\\begin{align}") => :MATH_ALIGN_OPEN, isexactly("\\end{align}") => :MATH_ALIGN_CLOSE, + isexactly("\\begin{equation}") => :MATH_D_OPEN, + isexactly("\\end{equation}") => :MATH_D_CLOSE, isexactly("\\begin{eqnarray}") => :MATH_EQA_OPEN, isexactly("\\end{eqnarray}") => :MATH_EQA_CLOSE, isexactly("\\newcommand") => :LX_NEWCOMMAND, @@ -126,13 +132,15 @@ content which is needed to find latex definitions (see parser/markdown/find_bloc const MD_OCB = [ # name opening token closing token(s) nestable # --------------------------------------------------------------------- - OCProto(:COMMENT, :COMMENT_OPEN, (:COMMENT_CLOSE,), false), - OCProto(:CODE_BLOCK_LANG, :CODE_LANG, (:CODE_TRIPLE,), false), - OCProto(:CODE_BLOCK, :CODE_TRIPLE, (:CODE_TRIPLE,), false), - OCProto(:CODE_BLOCK_IND, :LR_INDENT, (:LINE_RETURN,), false), - OCProto(:CODE_INLINE, :CODE_DOUBLE, (:CODE_DOUBLE,), false), - OCProto(:CODE_INLINE, :CODE_SINGLE, (:CODE_SINGLE,), false), - OCProto(:ESCAPE, :ESCAPE, (:ESCAPE,), false), + OCProto(:COMMENT, :COMMENT_OPEN, (:COMMENT_CLOSE,), false), + OCProto(:CODE_BLOCK_LANG, :CODE_LANG, (:CODE_TRIPLE,), false), + OCProto(:CODE_BLOCK, :CODE_TRIPLE, (:CODE_TRIPLE,), false), + OCProto(:CODE_BLOCK_IND, :LR_INDENT, (:LINE_RETURN,), false), + OCProto(:CODE_INLINE, :CODE_DOUBLE, (:CODE_DOUBLE,), false), + OCProto(:CODE_INLINE, :CODE_SINGLE, (:CODE_SINGLE,), false), + OCProto(:ESCAPE, :ESCAPE, (:ESCAPE,), false), + OCProto(:FOOTNOTE_DEF, :FOOTNOTE_DEF, (:LINE_RETURN,), false), + OCProto(:LINK_DEF, :LINK_DEF, (:LINE_RETURN,), false), # ------------------------------------------------------------------ OCProto(:H1, :H1_OPEN, (L_RETURNS..., :EOS), false), # see [^3] OCProto(:H2, :H2_OPEN, (L_RETURNS..., :EOS), false), @@ -161,7 +169,6 @@ All header symbols. const MD_HEADER = (:H1, :H2, :H3, :H4, :H5, :H6) - """ MD_OCB_ESC @@ -182,6 +189,7 @@ const MD_OCB_MATH = [ OCProto(:MATH_A, :MATH_A, (:MATH_A,), false), OCProto(:MATH_B, :MATH_B, (:MATH_B,), false), OCProto(:MATH_C, :MATH_C_OPEN, (:MATH_C_CLOSE,), false), + OCProto(:MATH_C, :MATH_D_OPEN, (:MATH_D_CLOSE,), false), OCProto(:MATH_I, :MATH_I_OPEN, (:MATH_I_CLOSE,), false), OCProto(:MATH_ALIGN, :MATH_ALIGN_OPEN, (:MATH_ALIGN_CLOSE,), false), OCProto(:MATH_EQA, :MATH_EQA_OPEN, (:MATH_EQA_CLOSE,), false), diff --git a/src/parser/md_validate.jl b/src/parser/md_validate.jl new file mode 100644 index 000000000..f379e9cb2 --- /dev/null +++ b/src/parser/md_validate.jl @@ -0,0 +1,94 @@ +""" +$SIGNATURES + +Find footnotes refs and defs and eliminate the ones that don't verify the appropriate regex. +For a footnote ref: `\\[\\^[a-zA-Z0-0]+\\]` and `\\[\\^[a-zA-Z0-0]+\\]:` for the def. +""" +function validate_footnotes!(tokens::Vector{Token}) + fn_refs = Vector{Token}() + rm = Int[] + for (i, τ) in enumerate(tokens) + if τ.name == :FOOTNOTE_REF + # footnote ref [^1]: + m = match(r"^\[\^[a-zA-Z0-9]+\](:)?$", τ.ss) + if m !== nothing + if m.captures[1] !== nothing + # it's a def + tokens[i] = Token(:FOOTNOTE_DEF, τ.ss) + else + # it's a ref, take and delete + push!(fn_refs, τ) + push!(rm, i) + end + else + # delete + push!(rm, i) + end + end + end + deleteat!(tokens, rm) + return fn_refs +end + +""" +$SIGNATURES + +Verify that a given string corresponds to a well formed html entity. +""" +function validate_html_entity(ss::AS) + match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing +end + +""" +$(SIGNATURES) + +Given a candidate header block, check that the opening `#` is at the start of a line, otherwise +ignore the block. +""" +function validate_header_block(β::OCBlock)::Bool + # skip non-header blocks + β.name ∈ MD_HEADER || return true + # if it's a header block, have a look at the opening token + τ = otok(β) + # check if it overlaps with the first character + from(τ) == 1 && return true + # otherwise check if the previous character is a linereturn + s = str(β.ss) # does not allocate + prevc = s[prevind(str(β.ss), from(τ))] + prevc == '\n' && return true + return false +end + + +""" +$(SIGNATURES) + +Keep track of link defs. +""" +function validate_and_store_link_defs!(blocks::Vector{OCBlock})::Nothing + isempty(blocks) && return + rm = Int[] + parent = str(blocks[1]) + for (i, β) in enumerate(blocks) + if β.name == :LINK_DEF + # incremental backward look until we find a `[` or a `\n` if `\n` first, discard + ini = prevind(parent, from(β)) + k = ini + char = '\n' + while k ≥ 1 + char = parent[k] + char ∈ ('[','\n') && break + k = prevind(parent, k) + end + if char == '[' + # we have a [id]: lk add it to PAGE_LINK_DEFS + id = string(subs(parent, nextind(parent, k), ini)) + lk = β |> content |> strip |> string + PAGE_LINK_DEFS[id] = lk + end + push!(rm, i) + end + end + deleteat!(blocks, rm) + return nothing +end diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl index a24744836..a472c3fc9 100644 --- a/src/parser/ocblocks.jl +++ b/src/parser/ocblocks.jl @@ -208,3 +208,23 @@ function form_super_block!(blocks::Vector{OCBlock}, idx::Vector{Int}, empty!(curseq) return end + + +""" +$SIGNATURES + +Take a list of token and return those corresponding to special characters or html entities wrapped +in `HTML_SPCH` types (will be left alone by the markdown conversion and be inserted as is in the +HTML). +""" +function find_special_chars(tokens::Vector{Token}) + spch = Vector{HTML_SPCH}() + isempty(tokens) && return spch + for τ in tokens + τ.name == :CHAR_BACKSPACE && push!(spch, HTML_SPCH(τ.ss, "\")) + τ.name == :CHAR_BACKTICK && push!(spch, HTML_SPCH(τ.ss, "`")) + τ.name == :CHAR_LINEBREAK && push!(spch, HTML_SPCH(τ.ss, "
    ")) + τ.name == :CHAR_HTML_ENTITY && validate_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss)) + end + return spch +end diff --git a/src/parser/tokens.jl b/src/parser/tokens.jl index d45b5d73c..56d86c4c1 100644 --- a/src/parser/tokens.jl +++ b/src/parser/tokens.jl @@ -178,6 +178,12 @@ Check whether `c` is a letter or is in a vector of character `ac`. """ α(c::Char, ac::NTuple{K,Char}=()) where {K} = isletter(c) || (c ∈ ac) +""" +$(SIGNATURES) + +Check whether `c` is alpha numeric or in vector of character `ac` +""" +αη(c::Char, ac::NTuple{K,Char}=()) where {K} = α(c, tuple(ac..., ("$i"[1] for i in 0:9)...)) """ $(SIGNATURES) @@ -217,8 +223,18 @@ In combination with `incrlook`, checks to see if we have something that looks li Note that there can be fake matches, so this will need to be validated later on; if validated it will be treated as HTML; otherwise it will be shown as markdown. Triggerin char is a `&`. """ -is_html_entity(i::Int, c::Char) = α(c, ('#',';','0','1','2','3','4','5','6','7','8','9','0')) +is_html_entity(i::Int, c::Char) = αη(c, ('#',';')) +""" +$(SIGNATURES) + +Check if it looks like `\\[\\^[a-zA-Z0-9]+\\]:`. +""" +function is_footnote(i::Int, c::Char) + i == 1 && return c == '^' + i == 2 && return αη(c) + i > 2 && return αη(c, (']', ':')) +end """ TokenFinder diff --git a/test/converter/markdown3.jl b/test/converter/markdown3.jl index b69d683d3..4e6b8dd06 100644 --- a/test/converter/markdown3.jl +++ b/test/converter/markdown3.jl @@ -229,6 +229,9 @@ end end + + + @testset "IndCode" begin # issue 207 st = raw""" A @@ -241,19 +244,16 @@ end end """ * J.EOS @test isapproxstr(st |> seval, raw""" -

    - A -

    
    -                        a = 1+1
    -                        if a > 1
    -                            @show a
    -                        end
    -                        b = 2
    -                        @show a+b
    -                        
    - end -

    - """) +

    + A +

    a = 1+1
    +            if a > 1
    +                @show a
    +            end
    +            b = 2
    +            @show a+b
    + end +

    """) st = raw""" A `single` and ```python blah``` and @@ -267,14 +267,10 @@ end @test isapproxstr(st |> seval, raw"""

    A single and -

    
    -                        blah
    -                        
    - and -
    
    -                        a = 1+1
    -                        
    - then

    +
    blah
    + and
    a = 1+1
    + then +

    • blah

        diff --git a/test/parser/footnotes.jl b/test/parser/footnotes.jl new file mode 100644 index 000000000..c74fd0252 --- /dev/null +++ b/test/parser/footnotes.jl @@ -0,0 +1,34 @@ +@testset "footnotes" begin + st = """ + A[^1] B[^blah] C + """ * J.EOS + @test isapproxstr(st |> seval, """ +

        A[1] + B[2] + C

        """) + st = """ + A[^1] B[^blah] + C + [^1]: first footnote + [^blah]: second footnote + """ * J.EOS + @test isapproxstr(st |> seval, """ +

        + A + [1] + B[2] + C + + + + + +
        [1]first footnote
        + + + + + +
        [2]second footnote
        +

        """) +end diff --git a/test/runtests.jl b/test/runtests.jl index c482b4766..0c0a29e3d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,6 +16,7 @@ println("🍺") # PARSER folder println("PARSER/MD+LX") include("parser/markdown+latex.jl") +include("parser/footnotes.jl") println("🍺") println("PARSER/HTML") include("parser/html.jl") diff --git a/test/test_utils.jl b/test/test_utils.jl index 41e1d51f3..0539c7cb7 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -36,6 +36,10 @@ end function explore_md_steps(mds) J.def_GLOBAL_PAGE_VARS!() J.def_GLOBAL_LXDEFS!() + J.def_LOCAL_PAGE_VARS!() + J.def_PAGE_EQREFS!() + J.def_PAGE_BIBREFS!() + J.def_PAGE_FNREFS!() steps = OrderedDict{Symbol,NamedTuple}() @@ -44,6 +48,9 @@ function explore_md_steps(mds) tokens = J.find_indented_blocks(tokens, mds) steps[:tokenization] = (tokens=tokens,) + fn_refs = J.validate_footnotes!(tokens) + steps[:fn_validation] = (tokens=tokens, fn_refs=fn_refs) + # ocblocks blocks, tokens = J.find_all_ocblocks(tokens, J.MD_OCB_ALL) steps[:ocblocks] = (blocks=blocks, tokens=tokens)