bunch of fixes to do with spacing and ordering and tests (#235)

- closes #234, closes #219, closes #222, closes #223, closes #145 - blocks in maths envs are now disabled so that there's no bleeding of code in maths etc (#234) - adds tests to remove some spacing issues before or after insertions (e.g. if `\eqref{..}` is followed by a `.` , there shouldn't be a space between the insertion and the `.` something like `(9).`, this is more annoying than would appear due to the fact that the Julia Markdown parser removes whitespaces a bit arbitrarily). this also fixes #222 - reinforced escaping to not have `\*` or `\_` confuse bold or emph environment (#223) - penta ticks to fence triple ticks (#219) and reinforcement of the parser as a result allowing the use of a token validator when using a greedy rule to allow the discarding of a stack - adds `jd2html` a function that allows somewone to directly test the conversion from a judoc-markdown string to html (should be doc'd) + patch release
tlienart · Sep 27, 2019 · 1dd2525 · tlienart · Sep 27, 2019 · 1dd2525
1 parent dd69a95
commit 1dd2525
Show file tree

Hide file tree

Showing 18 changed files with 387 additions and 53 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -20,7 +20,7 @@ after_success:
 jobs:
   include:
     - stage: "Documentation"
-      julia: 1.0
+      julia: 1.2
       os: linux
       script:
         - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd()));

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "JuDoc"
 uuid = "4ca9428c-4c75-11e9-2efb-bf5cb6c1e8f8"
 authors = ["Thibaut Lienart <[email protected]>"]
-version = "0.3.2"
+version = "0.3.3"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/src/JuDoc.jl b/src/JuDoc.jl
@@ -12,7 +12,7 @@ import LiveServer
 
 using DocStringExtensions: SIGNATURES, TYPEDEF
 
-export serve, publish, cleanpull, newsite, optimize
+export serve, publish, cleanpull, newsite, optimize, jd2html
 
 # -----------------------------------------------------------------------------
 #
@@ -114,4 +114,18 @@ include("misc_html.jl")
 # ERROR TYPES
 include("error_types.jl")
 
+"""
+$SIGNATURES
+
+Return the HTML corresponding to a JuDoc-Markdown string.
+"""
+function jd2html(st::AbstractString)::String
+    def_GLOBAL_PAGE_VARS!()
+    def_GLOBAL_LXDEFS!()
+    CUR_PATH[] = "index.md"
+    m, v = convert_md(st * EOS, collect(values(GLOBAL_LXDEFS)))
+    h = convert_html(m, v)
+    return h
+end
+
 end # module
diff --git a/src/converter/fixer.jl b/src/converter/fixer.jl
@@ -56,7 +56,7 @@ function find_and_fix_md_links(hs::String)::String
             end
         end
         # move the head after the match
-        head = nextind(hs, m.offset + length(m.match) - 1)
+        head = nextind(hs, m.offset + lastindex(m.match) - 1)
     end
     strlen = lastindex(hs)
     (head < strlen) && write(h, subs(hs, head, strlen))

diff --git a/src/converter/html_blocks.jl b/src/converter/html_blocks.jl
@@ -32,7 +32,7 @@ end
 """
 $(SIGNATURES)
 
-Helper function to process an individual block when the block is a `HIsDef` such as `{{ ifdef
+Helper function to process an individual block when the block is a `HIsDef` such as `{{ isdef
 author }} {{ fill author }} {{ end }}`. Which checks if a variable exists and if it does, applies
 something.
 """

diff --git a/src/converter/md.jl b/src/converter/md.jl
@@ -35,7 +35,6 @@ function convert_md(mds::String, pre_lxdefs::Vector{LxDef}=Vector{LxDef}();
     #> 1. Tokenize
     tokens  = find_tokens(mds, MD_TOKENS, MD_1C_TOKENS)
     fn_refs = validate_footnotes!(tokens)
-
     #> 1b. Find indented blocks
     tokens = find_indented_blocks(tokens, mds)
 
@@ -290,11 +289,18 @@ function convert_inter_html(ihtml::AS,
         !(hasli2) && (c2b ≤ strlen - 4) && ihtml[c2a:c2b] == "</p>" && (δ2 = 4)
 
         # write whatever is at the front, skip the extra space if still present
-        δ1 = ifelse(iszero(δ1) && !hasli1, 1, δ1)
-        prev = (m.offset - δ1 > 0) ? prevind(ihtml, m.offset - δ1) : 0
+        prev = prevind(ihtml, m.offset - δ1)
+        if prev > 0
+            prev -= ifelse(ihtml[prev] == ' ', 1, 0)
+        else
+            prev = 0
+        end
         (head ≤ prev) && write(htmls, subs(ihtml, head:prev))
         # move head appropriately
-        head = iend + δ2 + 1
+        head = iend + δ2
+        if head ≤ strlen
+            head += ifelse(ihtml[head] in (' ', '>'), 1, 0)
+        end
         # store the resolved block
         write(htmls, convert_block(blocks[i], lxcontext))
     end

diff --git a/src/converter/md_blocks.jl b/src/converter/md_blocks.jl
@@ -123,7 +123,9 @@ $(SIGNATURES)
 Helper function for the code block case of `convert_block`.
 """
 function convert_code_block(ss::SubString)::String
-    m = match(r"```([a-z-]*)(\:[a-zA-Z\\\/-_\.]+)?\s*\n?((?:.|\n)*)```", ss)
+    fencer = ifelse(startswith(ss, "`````"), "`````", "```")
+    reg    = Regex("$fencer([a-z-]*)(\\:[a-zA-Z\\\\\\/-_\\.]+)?\\s*\\n?((?:.|\\n)*)$fencer")
+    m      = match(reg, ss)
     lang  = m.captures[1]
     rpath = m.captures[2]
     code  = m.captures[3]
@@ -187,7 +189,9 @@ function convert_indented_code_block(ss::SubString)::String
     # 1. decrease indentation of all lines (either frontal \n\t or \n⎵⎵⎵⎵)
     code = replace(ss, r"\n(?:\t| {4})" => "\n")
     # 2. return; lang is a LOCAL_PAGE_VARS that is julia by default and can be set
-    return html_code(strip(code), "{{fill lang}}")
+    sc = strip(code)
+    isempty(sc) && return ""
+    return html_code(sc, "{{fill lang}}")
 end
 
 """

diff --git a/src/parser/md_tokens.jl b/src/parser/md_tokens.jl
@@ -40,7 +40,15 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
              ],
     ']'  => [ isexactly("]: ") => :LINK_DEF,
              ],
-    '\\' => [ isexactly("\\{")        => :INACTIVE,         # See note [^1]
+    '\\' => [ # -- special characters, see `find_special_chars` in ocblocks
+              isexactly("\\\\")       => :CHAR_LINEBREAK, # --> <br/>
+              isexactly("\\", (' ',)) => :CHAR_BACKSPACE, # --> &#92;
+              isexactly("\\*")        => :CHAR_ASTERISK,  # --> &#42;
+              isexactly("\\_")        => :CHAR_UNDERSCORE,# --> &#95;
+              isexactly("\\`")        => :CHAR_BACKTICK,  # --> &#96;
+              isexactly("\\@")        => :CHAR_ATSIGN,    # --> &#64;
+              # -- maths
+              isexactly("\\{")        => :INACTIVE,         # See note [^1]
               isexactly("\\}")        => :INACTIVE,         # See note [^1]
               isexactly("\\\$")       => :INACTIVE,         # See note [^1]
               isexactly("\\[")        => :MATH_C_OPEN,      # \[ ...
@@ -51,10 +59,8 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
               isexactly("\\end{equation}")   => :MATH_D_CLOSE,
               isexactly("\\begin{eqnarray}") => :MATH_EQA_OPEN,
               isexactly("\\end{eqnarray}")   => :MATH_EQA_CLOSE,
+              # -- latex
               isexactly("\\newcommand")      => :LX_NEWCOMMAND,
-              isexactly("\\\\")              => :CHAR_LINEBREAK, # will be replaced by <br/>
-              isexactly("\\", (' ',))        => :CHAR_BACKSPACE, # will be replaced by &#92;
-              isexactly("\\`")               => :CHAR_BACKTICK,  # will be replaced by &#96;
               incrlook((_, c) -> α(c))       => :LX_COMMAND,     # \command⎵*
              ],
     '@'  => [ isexactly("@def", (' ',)) => :MD_DEF_OPEN,  # @def var = ...
@@ -79,7 +85,9 @@ const MD_TOKENS = Dict{Char, Vector{TokenFinder}}(
     '`'  => [ isexactly("`", ('`',), false) => :CODE_SINGLE, # `⎵
               isexactly("``",('`',), false) => :CODE_DOUBLE, # ``⎵*
               isexactly("```", SPACER)      => :CODE_TRIPLE, # ```⎵*
-              incrlook(is_language)         => :CODE_LANG,   # ```lang*
+              isexactly("`````", SPACER)    => :CODE_PENTA,  # `````⎵*
+              is_language()                 => :CODE_LANG,   # ```lang*
+              is_language2()                => :CODE_LANG2,  # `````lang*
              ],
     ) # end dict
 #= NOTE
@@ -134,7 +142,9 @@ const MD_OCB = [
     # ---------------------------------------------------------------------
     OCProto(:COMMENT,         :COMMENT_OPEN, (:COMMENT_CLOSE,), false),
     OCProto(:CODE_BLOCK_LANG, :CODE_LANG,    (:CODE_TRIPLE,),   false),
+    OCProto(:CODE_BLOCK_LANG, :CODE_LANG2,   (:CODE_PENTA,),    false),
     OCProto(:CODE_BLOCK,      :CODE_TRIPLE,  (:CODE_TRIPLE,),   false),
+    OCProto(:CODE_BLOCK,      :CODE_PENTA,   (:CODE_PENTA,),    false),
     OCProto(:CODE_BLOCK_IND,  :LR_INDENT,    (:LINE_RETURN,),   false),
     OCProto(:CODE_INLINE,     :CODE_DOUBLE,  (:CODE_DOUBLE,),   false),
     OCProto(:CODE_INLINE,     :CODE_SINGLE,  (:CODE_SINGLE,),   false),
@@ -217,3 +227,12 @@ MATH_BLOCKS_NAMES
 List of names of maths environments.
 """
 const MATH_BLOCKS_NAMES = [e.name for e ∈ MD_OCB_MATH]
+
+
+"""
+MD_OCB_NO_INNER
+
+List of names of blocks which will deactivate any block contained within them.
+See [`find_all_ocblocks`](@ref).
+"""
+const MD_OCB_NO_INNER = vcat(MD_OCB_ESC, MATH_BLOCKS_NAMES, :LXB)
diff --git a/src/parser/ocblocks.jl b/src/parser/ocblocks.jl
@@ -80,15 +80,16 @@ function find_all_ocblocks(tokens::Vector{Token}, ocplist::Vector{OCProto}; inma
         append!(ocbs_all, ocbs)
     end
     # it may happen that a block is contained in a larger escape block.
-    # For instance this can happen if there is a code block in an escape block (see e.g. #151).
-    # To fix this, we browse the escape blocks in backwards order and check if there is any other
-    # block within it.
+    # For instance this can happen if there is a code block in an escape block
+    # (see e.g. #151) or if there's indentation in a math block.
+    # To fix this, we browse the escape blocks in backwards order and check if
+    # there is any other block within it.
     i = length(ocbs_all)
     active = ones(Bool, i)
     all_heads = from.(ocbs_all)
     while i > 1
         cur_ocb = ocbs_all[i]
-        if active[i] && cur_ocb.name ∈ MD_OCB_ESC
+        if active[i] && cur_ocb.name ∈ MD_OCB_NO_INNER
             # find all blocks within the span of this block, deactivate all of them
             cur_head = all_heads[i]
             cur_tail = to(cur_ocb)
@@ -221,6 +222,9 @@ function find_special_chars(tokens::Vector{Token})
     spch = Vector{HTML_SPCH}()
     isempty(tokens) && return spch
     for τ in tokens
+        τ.name == :CHAR_ASTERISK    && push!(spch, HTML_SPCH(τ.ss, "&#42;"))
+        τ.name == :CHAR_UNDERSCORE  && push!(spch, HTML_SPCH(τ.ss, "&#95;"))
+        τ.name == :CHAR_ATSIGN      && push!(spch, HTML_SPCH(τ.ss, "&#64;"))
         τ.name == :CHAR_BACKSPACE   && push!(spch, HTML_SPCH(τ.ss, "&#92;"))
         τ.name == :CHAR_BACKTICK    && push!(spch, HTML_SPCH(τ.ss, "&#96;"))
         τ.name == :CHAR_LINEBREAK   && push!(spch, HTML_SPCH(τ.ss, "<br/>"))

diff --git a/src/parser/tokens.jl b/src/parser/tokens.jl
@@ -158,16 +158,16 @@ julia> s
 ```
 """
 function isexactly(refstring::AS, follow::NTuple{K,Char} where K = (),
-                   isfollowed=true)::Tuple{Int,Bool,Function}
+                   isfollowed=true)::Tuple{Int,Bool,Function,Nothing}
     # number of steps from the start character
     steps = prevind(refstring, lastindex(refstring))
     # no offset (don't check next character)
-    isempty(follow) && return (steps, false, s -> (s == refstring))
+    isempty(follow) && return (steps, false, s -> (s == refstring), nothing)
     # include next char for verification (--> offset of 1)
     steps = nextind(refstring, steps)
-    # verification function
+    # verification function; we want either (false false or true true))
     λ(s) = (chop(s) == refstring) && !xor(isfollowed, s[end] ∈ follow)
-    return (steps, true, λ)
+    return (steps, true, λ, nothing)
 end
 
 
@@ -193,7 +193,7 @@ a case where from a start character we lazily accept the next sequence of charac
 soon as a character fails to verify `λ(c)`.
 See also [`isexactly`](@ref).
 """
-incrlook(λ::Function) = (0, false, λ)
+incrlook(λ::Function, validator=nothing) = (0, false, λ, validator)
 
 """
 $(SIGNATURES)
@@ -210,12 +210,33 @@ In combination with `incrlook`, checks to see if we have something that looks li
 backtick followed by a valid combination of letter defining a language. Triggering char is a
 first backtick.
 """
-function is_language(i::Int, c::Char)
-    i < 3  && return c=='`'  # ` followed by `` forms the opening ```
-    i == 3 && return α(c)    # must be a letter
-    return α(c, ('-',))      # can be a letter or a hyphen, for instance ```objective-c
+is_language() = incrlook(_is_language, _validate_language)
+
+function _is_language(i::Int, c::Char)
+    i < 3  && return c == '`'  # ` followed by `` forms the opening ```
+    i == 3 && return α(c)      # must be a letter
+    return α(c, ('-',))        # can be a letter or a hyphen, for instance ```objective-c
+end
+
+_validate_language(stack::AS) = match(r"^```[a-zA-Z]", stack) !== nothing
+
+
+"""
+$(SIGNATURES)
+
+See [`is_language`](@ref) but with 5 ticks.
+"""
+is_language2() = incrlook(_is_language2, _validate_language2)
+
+function _is_language2(i::Int, c::Char)
+    i < 5  && return c == '`'
+    i == 5 && return α(c)
+    return α(c, ('-',))
 end
 
+_validate_language2(stack::AS) = match(r"^`````[a-zA-Z]", stack) !== nothing
+
+
 """
 $(SIGNATURES)
 
@@ -242,7 +263,7 @@ TokenFinder
 Convenience type to define tokens. The Tuple comes from the output of functions such as
 [`isexactly`](@ref).
 """
-const TokenFinder = Pair{Tuple{Int,Bool,Function},Symbol}
+const TokenFinder = Pair{Tuple{Int,Bool,Function,Union{Nothing,Function}},Symbol}
 
 
 """
@@ -277,7 +298,7 @@ function find_tokens(str::AS,
 
         # 2. is it one of the multi-char token?
         elseif haskey(tokens_dict, head)
-            for ((steps, offset, λ), case) ∈ tokens_dict[head]
+            for ((steps, offset, λ, ν), case) ∈ tokens_dict[head]
                 #=
                 ↪ steps = length of the lookahead, 0 if incremental
                 ↪ offset = if we need to check one character 'too much'
@@ -316,6 +337,10 @@ function find_tokens(str::AS,
                     end
                     endchar_idx = prevind(str, nextchar_idx)
                     if endchar_idx > head_idx
+                        # if the validator is unhappy, don't move the head and
+                        # consider other rules
+                        ν === nothing || ν(stack) || continue
+                        # otherwise move ahead after the match
                         push!(tokens, Token(case, stack))
                         head_idx = endchar_idx
                     end

diff --git a/test/converter/eval.jl b/test/converter/eval.jl
@@ -23,8 +23,13 @@
     @test isfile(opath)
     @test read(opath, String) == "25"
 
-    @test occursin("code: <pre><code class=\"language-julia\">a = 5\nprint(a^2)</code></pre>", h)
-    @test occursin("then: <pre><code>25</code></pre> done.", h)
+    @test isapproxstr(h, raw"""
+            <p>Simple code:
+            <pre><code class="language-julia">a = 5
+            print(a^2)</code></pre>
+            then:
+            <pre><code>25</code></pre>
+            done.</p>""")
 end
 
 @testset "Eval (errs)" begin
@@ -39,7 +44,11 @@ end
         done.
         """ * J.EOS |> seval
 
-    @test occursin("code: <pre><code class=\"language-python\">a = 5\nprint(a**2)\n</code></pre> done.", h)
+    @test isapproxstr(h, raw"""
+            <p>Simple code:
+            <pre><code class="language-python">a = 5
+            print(a**2)</code></pre>
+            done.</p>""")
 end
 
 @testset "Eval (rinput)" begin
@@ -62,8 +71,13 @@ end
     @test isfile(opath)
     @test read(opath, String) == "25"
 
-    @test occursin("code: <pre><code class=\"language-julia\">a = 5\nprint(a^2)</code></pre>", h)
-    @test occursin("then: <pre><code>25</code></pre> done.", h)
+    @test isapproxstr(h, """
+            <p>Simple code:
+            <pre><code class="language-julia">a = 5
+            print(a^2)</code></pre>
+            then:
+            <pre><code>25</code></pre>
+            done.</p>""")
 
     # ------------
 
@@ -88,8 +102,12 @@ end
     @test isfile(opath)
     @test read(opath, String) == "25"
 
-    @test occursin("code: <pre><code class=\"language-julia\">a = 5\nprint(a^2)</code></pre>", h)
-    @test occursin("then: <pre><code>25</code></pre> done.", h)
+    @test isapproxstr(h, """
+            <p>Simple code:
+            <pre><code class="language-julia">a = 5
+            print(a^2)</code></pre>
+            then:
+            <pre><code>25</code></pre>  done.</p>""")
 end
 
 @testset "Eval (module)" begin