From 8ba8e579ef2d8410b0c2cb02309a84793d810065 Mon Sep 17 00:00:00 2001
From: Thibaut Lienart
- A
- "shell>")
+ pp = replace(pp, r"(\(.*?\)) pkg>"=>s"\1 pkg>")
+ end
+ write(htmls, pp)
head = mc.offset + length(mc.match)
c += 1
end
diff --git a/src/converter/md.jl b/src/converter/md.jl
index 5dd763997..2b45c138b 100644
--- a/src/converter/md.jl
+++ b/src/converter/md.jl
@@ -23,6 +23,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
def_PAGE_HEADERS!() # all the headers
def_PAGE_EQREFS!() # page-specific equation dict (hrefs)
def_PAGE_BIBREFS!() # page-specific reference dict (hrefs)
+ def_PAGE_FNREFS!() # page-specific footnote dict
+ def_PAGE_LINK_DEFS!() # page-specific link definition candidates [..]: (...)
end
#
@@ -31,9 +33,10 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
#
#> 1. Tokenize
- tokens = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+ tokens = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
+ fn_refs = validate_footnotes!(tokens)
- #> 1'. Find indented blocks
+ #> 1b. Find indented blocks
tokens = find_indented_blocks(tokens, mds)
#> 2. Open-Close blocks (OCBlocks)
@@ -45,6 +48,8 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
filter!(τ -> τ.name ∉ L_RETURNS, tokens)
#>> d. filter out "fake headers" (opening ### that are not at the start of a line)
filter!(β -> validate_header_block(β), blocks)
+ #>> e. keep track of literal content of possible link definitions to use
+ validate_and_store_link_defs!(blocks)
#> 3. LaTeX commands
#>> a. find "newcommands", update active blocks/braces
@@ -70,7 +75,7 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
#
#> 1. Merge all the blocks that will need further processing before insertion
- blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), sp_chars)
+ blocks2insert = merge_blocks(lxcoms, deactivate_divs(blocks), fn_refs, sp_chars)
#> 2. Form intermediate markdown + html
inter_md, mblocks = form_inter_md(mds, blocks2insert, lxdefs)
diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl
index bdf79ca07..8c1356e06 100644
--- a/src/converter/md_blocks.jl
+++ b/src/converter/md_blocks.jl
@@ -7,15 +7,16 @@ Helper function for `convert_inter_html` that processes an extracted block given
function convert_block(β::AbstractBlock, lxcontext::LxContext)::AS
# case for special characters / html entities
β isa HTML_SPCH && return ifelse(isempty(β.r), β.ss, β.r)
-
# Return relevant interpolated string based on case
βn = β.name
βn ∈ MD_HEADER && return convert_header(β)
βn == :CODE_INLINE && return html_code_inline(content(β) |> htmlesc)
βn == :CODE_BLOCK_LANG && return convert_code_block(β.ss)
βn == :CODE_BLOCK_IND && return convert_indented_code_block(β.ss)
- βn == :CODE_BLOCK && return html_code(strip(content(β) |> htmlesc), "{{fill lang}}")
+ βn == :CODE_BLOCK && return html_code(strip(content(β)), "{{fill lang}}")
βn == :ESCAPE && return chop(β.ss, head=3, tail=3)
+ βn == :FOOTNOTE_REF && return convert_footnote_ref(β)
+ βn == :FOOTNOTE_DEF && return convert_footnote_def(β, lxcontext)
# Math block --> needs to call further processing to resolve possible latex
βn ∈ MATH_BLOCKS_NAMES && return convert_math_block(β, lxcontext.lxdefs)
@@ -185,5 +186,60 @@ function convert_indented_code_block(ss::SubString)::String
# 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵)
code = replace(ss, r"\n(?:\t| {4})" => "\n")
# 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set
- return html_code(strip(code) |> htmlesc, "{{fill lang}}")
+ return html_code(strip(code), "{{fill lang}}")
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]` into a html sup object with appropriate ref and backref.
+"""
+function convert_footnote_ref(β::Token)::String
+ # β.ss is [^id]; extract id
+ id = string(match(r"\[\^(.*?)\]", β.ss).captures[1])
+ # add it to the list of refs unless it's been seen before
+ pos = 0
+ for (i, pri) in enumerate(PAGE_FNREFS)
+ if pri == id
+ pos = i
+ break
+ end
+ end
+ if pos == 0
+ push!(PAGE_FNREFS, id)
+ pos = length(PAGE_FNREFS)
+ end
+ return html_sup("fnref:$id", html_ahref("#fndef:$id", "[$pos]"; class="fnref"))
+end
+
+"""
+$(SIGNATURES)
+
+Helper function to convert a `[^1]: ...` into a html table for the def.
+"""
+function convert_footnote_def(β::OCBlock, lxcontext::LxContext)::String
+ # otok(β) is [^id]:
+ id = match(r"\[\^(.*?)\]:", otok(β).ss).captures[1]
+ pos = 0
+ for (i, pri) in enumerate(PAGE_FNREFS)
+ if pri == id
+ pos = i
+ break
+ end
+ end
+ if pos == 0
+ # this was never referenced before, so probably best not to show it
+ return ""
+ end
+ # need to process the content which could contain stuff
+ ct, _ = convert_md(content(β) * EOS, lxcontext.lxdefs;
+ isrecursive=true, has_mddefs=false)
+ """
+
+
+ """
end
diff --git a/src/converter/md_utils.jl b/src/converter/md_utils.jl
index dc8152234..945b9c527 100644
--- a/src/converter/md_utils.jl
+++ b/src/converter/md_utils.jl
@@ -48,27 +48,6 @@ function deactivate_divs(blocks::Vector{OCBlock})::Vector{OCBlock}
end
-"""
-$(SIGNATURES)
-
-Given a candidate header block, check that the opening `#` is at the start of a line, otherwise
-ignore the block.
-"""
-function validate_header_block(β::OCBlock)::Bool
- # skip non-header blocks
- β.name ∈ MD_HEADER || return true
- # if it's a header block, have a look at the opening token
- τ = otok(β)
- # check if it overlaps with the first character
- from(τ) == 1 && return true
- # otherwise check if the previous character is a linereturn
- s = str(β.ss) # does not allocate
- prevc = s[prevind(str(β.ss), from(τ))]
- prevc == '\n' && return true
- return false
-end
-
-
"""
$(SIGNATURES)
diff --git a/src/jd_vars.jl b/src/jd_vars.jl
index d826dbbbc..6564dee05 100644
--- a/src/jd_vars.jl
+++ b/src/jd_vars.jl
@@ -49,12 +49,14 @@ is processed.
LOCAL_PAGE_VARS["hasmath"] = Pair(true, (Bool,))
LOCAL_PAGE_VARS["hascode"] = Pair(false, (Bool,))
LOCAL_PAGE_VARS["date"] = Pair(Date(1), (String, Date, Nothing))
- LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,)) # time of creation
- LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,)) # time of last modification
- LOCAL_PAGE_VARS["jd_rpath"] = Pair("", (String,)) # local path to file src/[...]/blah.md
LOCAL_PAGE_VARS["lang"] = Pair("julia", (String,)) # default lang for indented code
LOCAL_PAGE_VARS["reflinks"] = Pair(true, (Bool,)) # whether there are reflinks or not
+ # page vars used by judoc, should not be accessed or defined
+ LOCAL_PAGE_VARS["jd_ctime"] = Pair(Date(1), (Date,)) # time of creation
+ LOCAL_PAGE_VARS["jd_mtime"] = Pair(Date(1), (Date,)) # time of last modification
+ LOCAL_PAGE_VARS["jd_rpath"] = Pair("", (String,)) # local path to file src/[...]/blah.md
+
# If there are GLOBAL vars that are defined, they take precedence
local_keys = keys(LOCAL_PAGE_VARS)
for k in keys(GLOBAL_PAGE_VARS)
@@ -73,9 +75,10 @@ the title, the refstring version of the title, the occurence number and the leve
"""
const PAGE_HEADERS = Dict{Int,Tuple{AS,AS,Int,Int}}()
-
"""
$(SIGNATURES)
+
+Empties `PAGE_HEADERS`.
"""
@inline function def_PAGE_HEADERS!()::Nothing
empty!(PAGE_HEADERS)
@@ -83,6 +86,40 @@ $(SIGNATURES)
end
+"""
+PAGE_FNREFS
+
+Keep track of name of seen footnotes; the order is kept as it's a list.
+"""
+const PAGE_FNREFS = String[]
+
+"""
+$(SIGNATURES)
+
+Empties `PAGE_FNREFS`.
+"""
+@inline function def_PAGE_FNREFS!()::Nothing
+ empty!(PAGE_FNREFS)
+ return nothing
+end
+
+"""
+PAGE_LINK_DEFS
+
+Keep track of link def candidates
+"""
+const PAGE_LINK_DEFS = LittleDict{String,String}()
+
+"""
+$(SIGNATURES)
+
+Empties `PAGE_LINK_DEFS`.
+"""
+@inline function def_PAGE_LINK_DEFS!()::Nothing
+ empty!(PAGE_LINK_DEFS)
+ return nothing
+end
+
"""
GLOBAL_LXDEFS
diff --git a/src/misc_html.jl b/src/misc_html.jl
index 89b87121a..b75f190d6 100644
--- a/src/misc_html.jl
+++ b/src/misc_html.jl
@@ -12,6 +12,13 @@ Convenience function for a list item
"""
html_li(in::AS) = "
+
+ $(html_ahref("#fnref:$id", "[$pos]"))
+ $(ct)
+
"))
- τ.name == :CHAR_HTML_ENTITY && verify_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss))
- end
- return spch
-end
-
-"""
-$SIGNATURES
-
-Verify that a given string corresponds to a well formed html entity.
-"""
-function verify_html_entity(ss::AS)
- match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing
-end
diff --git a/src/parser/md_tokens.jl b/src/parser/md_tokens.jl
index c7b4dfbd9..1de6b4a64 100644
--- a/src/parser/md_tokens.jl
+++ b/src/parser/md_tokens.jl
@@ -36,6 +36,10 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
],
'~' => [ isexactly("~~~") => :ESCAPE, # ~~~ ... ~~~
],
+ '[' => [ incrlook(is_footnote) => :FOOTNOTE_REF, # [^...](:)? defs will be separated after
+ ],
+ ']' => [ isexactly("]: ") => :LINK_DEF,
+ ],
'\\' => [ isexactly("\\{") => :INACTIVE, # See note [^1]
isexactly("\\}") => :INACTIVE, # See note [^1]
isexactly("\\\$") => :INACTIVE, # See note [^1]
@@ -43,6 +47,8 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
isexactly("\\]") => :MATH_C_CLOSE, # ... \]
isexactly("\\begin{align}") => :MATH_ALIGN_OPEN,
isexactly("\\end{align}") => :MATH_ALIGN_CLOSE,
+ isexactly("\\begin{equation}") => :MATH_D_OPEN,
+ isexactly("\\end{equation}") => :MATH_D_CLOSE,
isexactly("\\begin{eqnarray}") => :MATH_EQA_OPEN,
isexactly("\\end{eqnarray}") => :MATH_EQA_CLOSE,
isexactly("\\newcommand") => :LX_NEWCOMMAND,
@@ -126,13 +132,15 @@ content which is needed to find latex definitions (see parser/markdown/find_bloc
const MD_OCB = [
# name opening token closing token(s) nestable
# ---------------------------------------------------------------------
- OCProto(:COMMENT, :COMMENT_OPEN, (:COMMENT_CLOSE,), false),
- OCProto(:CODE_BLOCK_LANG, :CODE_LANG, (:CODE_TRIPLE,), false),
- OCProto(:CODE_BLOCK, :CODE_TRIPLE, (:CODE_TRIPLE,), false),
- OCProto(:CODE_BLOCK_IND, :LR_INDENT, (:LINE_RETURN,), false),
- OCProto(:CODE_INLINE, :CODE_DOUBLE, (:CODE_DOUBLE,), false),
- OCProto(:CODE_INLINE, :CODE_SINGLE, (:CODE_SINGLE,), false),
- OCProto(:ESCAPE, :ESCAPE, (:ESCAPE,), false),
+ OCProto(:COMMENT, :COMMENT_OPEN, (:COMMENT_CLOSE,), false),
+ OCProto(:CODE_BLOCK_LANG, :CODE_LANG, (:CODE_TRIPLE,), false),
+ OCProto(:CODE_BLOCK, :CODE_TRIPLE, (:CODE_TRIPLE,), false),
+ OCProto(:CODE_BLOCK_IND, :LR_INDENT, (:LINE_RETURN,), false),
+ OCProto(:CODE_INLINE, :CODE_DOUBLE, (:CODE_DOUBLE,), false),
+ OCProto(:CODE_INLINE, :CODE_SINGLE, (:CODE_SINGLE,), false),
+ OCProto(:ESCAPE, :ESCAPE, (:ESCAPE,), false),
+ OCProto(:FOOTNOTE_DEF, :FOOTNOTE_DEF, (:LINE_RETURN,), false),
+ OCProto(:LINK_DEF, :LINK_DEF, (:LINE_RETURN,), false),
# ------------------------------------------------------------------
OCProto(:H1, :H1_OPEN, (L_RETURNS..., :EOS), false), # see [^3]
OCProto(:H2, :H2_OPEN, (L_RETURNS..., :EOS), false),
@@ -161,7 +169,6 @@ All header symbols.
const MD_HEADER = (:H1, :H2, :H3, :H4, :H5, :H6)
-
"""
MD_OCB_ESC
@@ -182,6 +189,7 @@ const MD_OCB_MATH = [
OCProto(:MATH_A, :MATH_A, (:MATH_A,), false),
OCProto(:MATH_B, :MATH_B, (:MATH_B,), false),
OCProto(:MATH_C, :MATH_C_OPEN, (:MATH_C_CLOSE,), false),
+ OCProto(:MATH_C, :MATH_D_OPEN, (:MATH_D_CLOSE,), false),
OCProto(:MATH_I, :MATH_I_OPEN, (:MATH_I_CLOSE,), false),
OCProto(:MATH_ALIGN, :MATH_ALIGN_OPEN, (:MATH_ALIGN_CLOSE,), false),
OCProto(:MATH_EQA, :MATH_EQA_OPEN, (:MATH_EQA_CLOSE,), false),
diff --git a/src/parser/md_validate.jl b/src/parser/md_validate.jl
new file mode 100644
index 000000000..f379e9cb2
--- /dev/null
+++ b/src/parser/md_validate.jl
@@ -0,0 +1,94 @@
+"""
+$SIGNATURES
+
+Find footnotes refs and defs and eliminate the ones that don't verify the appropriate regex.
+For a footnote ref: `\\[\\^[a-zA-Z0-0]+\\]` and `\\[\\^[a-zA-Z0-0]+\\]:` for the def.
+"""
+function validate_footnotes!(tokens::Vector{Token})
+ fn_refs = Vector{Token}()
+ rm = Int[]
+ for (i, τ) in enumerate(tokens)
+ if τ.name == :FOOTNOTE_REF
+ # footnote ref [^1]:
+ m = match(r"^\[\^[a-zA-Z0-9]+\](:)?$", τ.ss)
+ if m !== nothing
+ if m.captures[1] !== nothing
+ # it's a def
+ tokens[i] = Token(:FOOTNOTE_DEF, τ.ss)
+ else
+ # it's a ref, take and delete
+ push!(fn_refs, τ)
+ push!(rm, i)
+ end
+ else
+ # delete
+ push!(rm, i)
+ end
+ end
+ end
+ deleteat!(tokens, rm)
+ return fn_refs
+end
+
+"""
+$SIGNATURES
+
+Verify that a given string corresponds to a well formed html entity.
+"""
+function validate_html_entity(ss::AS)
+ match(r"&(?:[a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});", ss) !== nothing
+end
+
+"""
+$(SIGNATURES)
+
+Given a candidate header block, check that the opening `#` is at the start of a line, otherwise
+ignore the block.
+"""
+function validate_header_block(β::OCBlock)::Bool
+ # skip non-header blocks
+ β.name ∈ MD_HEADER || return true
+ # if it's a header block, have a look at the opening token
+ τ = otok(β)
+ # check if it overlaps with the first character
+ from(τ) == 1 && return true
+ # otherwise check if the previous character is a linereturn
+ s = str(β.ss) # does not allocate
+ prevc = s[prevind(str(β.ss), from(τ))]
+ prevc == '\n' && return true
+ return false
+end
+
+
+"""
+$(SIGNATURES)
+
+Keep track of link defs.
+"""
+function validate_and_store_link_defs!(blocks::Vector{OCBlock})::Nothing
+ isempty(blocks) && return
+ rm = Int[]
+ parent = str(blocks[1])
+ for (i, β) in enumerate(blocks)
+ if β.name == :LINK_DEF
+ # incremental backward look until we find a `[` or a `\n` if `\n` first, discard
+ ini = prevind(parent, from(β))
+ k = ini
+ char = '\n'
+ while k ≥ 1
+ char = parent[k]
+ char ∈ ('[','\n') && break
+ k = prevind(parent, k)
+ end
+ if char == '['
+ # we have a [id]: lk add it to PAGE_LINK_DEFS
+ id = string(subs(parent, nextind(parent, k), ini))
+ lk = β |> content |> strip |> string
+ PAGE_LINK_DEFS[id] = lk
+ end
+ push!(rm, i)
+ end
+ end
+ deleteat!(blocks, rm)
+ return nothing
+end
diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl
index a24744836..a472c3fc9 100644
--- a/src/parser/ocblocks.jl
+++ b/src/parser/ocblocks.jl
@@ -208,3 +208,23 @@ function form_super_block!(blocks::Vector{OCBlock}, idx::Vector{Int},
empty!(curseq)
return
end
+
+
+"""
+$SIGNATURES
+
+Take a list of token and return those corresponding to special characters or html entities wrapped
+in `HTML_SPCH` types (will be left alone by the markdown conversion and be inserted as is in the
+HTML).
+"""
+function find_special_chars(tokens::Vector{Token})
+ spch = Vector{HTML_SPCH}()
+ isempty(tokens) && return spch
+ for τ in tokens
+ τ.name == :CHAR_BACKSPACE && push!(spch, HTML_SPCH(τ.ss, "\"))
+ τ.name == :CHAR_BACKTICK && push!(spch, HTML_SPCH(τ.ss, "`"))
+ τ.name == :CHAR_LINEBREAK && push!(spch, HTML_SPCH(τ.ss, "
"))
+ τ.name == :CHAR_HTML_ENTITY && validate_html_entity(τ.ss) && push!(spch, HTML_SPCH(τ.ss))
+ end
+ return spch
+end
diff --git a/src/parser/tokens.jl b/src/parser/tokens.jl
index d45b5d73c..56d86c4c1 100644
--- a/src/parser/tokens.jl
+++ b/src/parser/tokens.jl
@@ -178,6 +178,12 @@ Check whether `c` is a letter or is in a vector of character `ac`.
"""
α(c::Char, ac::NTuple{K,Char}=()) where {K} = isletter(c) || (c ∈ ac)
+"""
+$(SIGNATURES)
+
+Check whether `c` is alpha numeric or in vector of character `ac`
+"""
+αη(c::Char, ac::NTuple{K,Char}=()) where {K} = α(c, tuple(ac..., ("$i"[1] for i in 0:9)...))
"""
$(SIGNATURES)
@@ -217,8 +223,18 @@ In combination with `incrlook`, checks to see if we have something that looks li
Note that there can be fake matches, so this will need to be validated later on; if validated
it will be treated as HTML; otherwise it will be shown as markdown. Triggerin char is a `&`.
"""
-is_html_entity(i::Int, c::Char) = α(c, ('#',';','0','1','2','3','4','5','6','7','8','9','0'))
+is_html_entity(i::Int, c::Char) = αη(c, ('#',';'))
+"""
+$(SIGNATURES)
+
+Check if it looks like `\\[\\^[a-zA-Z0-9]+\\]:`.
+"""
+function is_footnote(i::Int, c::Char)
+ i == 1 && return c == '^'
+ i == 2 && return αη(c)
+ i > 2 && return αη(c, (']', ':'))
+end
"""
TokenFinder
diff --git a/test/converter/markdown3.jl b/test/converter/markdown3.jl
index b69d683d3..4e6b8dd06 100644
--- a/test/converter/markdown3.jl
+++ b/test/converter/markdown3.jl
@@ -229,6 +229,9 @@ end
end
+
+
+
@testset "IndCode" begin # issue 207
st = raw"""
A
@@ -241,19 +244,16 @@ end
end
""" * J.EOS
@test isapproxstr(st |> seval, raw"""
-
- end
-
- a = 1+1
- if a > 1
- @show a
- end
- b = 2
- @show a+b
-
+ A +
a = 1+1
+ if a > 1
+ @show a
+ end
+ b = 2
+ @show a+b
+ end
+ """)
st = raw"""
A `single` and ```python blah``` and
@@ -267,14 +267,10 @@ end
@test isapproxstr(st |> seval, raw"""
A single
and
-
- blah
-
- and
-
- a = 1+1
-
- then
+ blah
+ anda = 1+1
+ then
+
blah
[1] | +first footnote | +
[2] | +second footnote | +