From 16b69522a4db457390c0f8b0d2c7eeebec973d2e Mon Sep 17 00:00:00 2001 From: yuin Date: Thu, 31 Oct 2019 17:34:14 +0900 Subject: [PATCH] Remove the WithWorkers option Situations that concurrent inline parsing is effective are very limited due to goroutine overheads and a parse context sharing mutex. --- README.md | 32 ++--- _benchmark/cmark/goldmark_benchmark.go | 15 -- _benchmark/go/benchmark_test.go | 39 ++--- extension/footnote.go | 2 +- parser/link.go | 4 +- parser/parser.go | 192 +------------------------ testutil/testutil.go | 3 +- 7 files changed, 28 insertions(+), 259 deletions(-) diff --git a/README.md b/README.md index b13dcbc..f2e0f12 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ With options ```go var buf bytes.Buffer -if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil { +if err := goldmark.Convert(source, &buf, parser.WithContext(ctx)); err != nil { panic(err) } ``` @@ -92,11 +92,6 @@ if err := goldmark.Convert(source, &buf, parser.WithWorkers(16)); err != nil { | Functional option | Type | Description | | ----------------- | ---- | ----------- | | `parser.WithContext` | A parser.Context | Context for the parsing phase. | -| parser.WithWorkers | int | Number of goroutines that execute concurrent inline element parsing. | - -`parser.WithWorkers` may make performance better a little if markdown text -is relatively large. Otherwise, `parser.Workers` may cause performance degradation due to -goroutine overheads. Custom parser and renderer -------------------------- @@ -255,19 +250,15 @@ blackfriday v2 can not simply be compared with other Commonmark compliant librar Though goldmark builds clean extensible AST structure and get full compliance with Commonmark, it is resonably fast and less memory consumption. -This benchmark parses a relatively large markdown text. In such text, concurrent parsing -makes performance better a little. - ``` -goos: windows +goos: darwin goarch: amd64 pkg: github.com/yuin/goldmark/_benchmark/go -BenchmarkMarkdown/Blackfriday-v2-4 200 6199986 ns/op 3320027 B/op 20050 allocs/op -BenchmarkMarkdown/GoldMark(workers=16)-4 300 5655736 ns/op 2700250 B/op 14494 allocs/op -BenchmarkMarkdown/GoldMark-4 200 6501805 ns/op 2594488 B/op 13861 allocs/op -BenchmarkMarkdown/CommonMark-4 200 7803784 ns/op 2752553 B/op 18826 allocs/op -BenchmarkMarkdown/Lute-4 200 6920985 ns/op 2984762 B/op 21270 allocs/op -BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op 2195980 B/op 22174 allocs/op +BenchmarkMarkdown/Blackfriday-v2-12 337 3407336 ns/op 3261042 B/op 19862 allocs/op +BenchmarkMarkdown/GoldMark-12 302 3947527 ns/op 2574830 B/op 13853 allocs/op +BenchmarkMarkdown/CommonMark-12 249 4784221 ns/op 2739317 B/op 18824 allocs/op +BenchmarkMarkdown/Lute-12 285 4178276 ns/op 4639751 B/op 26665 allocs/op +BenchmarkMarkdown/GoMarkdown-12 9 114246204 ns/op 2175131 B/op 22172 allocs/op ``` ### against cmark(A CommonMark reference implementation written in c) @@ -276,15 +267,12 @@ BenchmarkMarkdown/GoMarkdown-4 10 171046030 ns/op ----------- cmark ----------- file: _data.md iteration: 50 -average: 0.0047014618 sec +average: 0.0037760639 sec +go run ./goldmark_benchmark.go ------- goldmark ------- file: _data.md iteration: 50 -average: 0.0052624750 sec -------- goldmark(workers=16) ------- -file: _data.md -iteration: 50 -average: 0.0044918780 sec +average: 0.0040964230 sec ``` As you can see, goldmark performs pretty much equally to the cmark. diff --git a/_benchmark/cmark/goldmark_benchmark.go b/_benchmark/cmark/goldmark_benchmark.go index f17ee6d..9647de3 100644 --- a/_benchmark/cmark/goldmark_benchmark.go +++ b/_benchmark/cmark/goldmark_benchmark.go @@ -9,7 +9,6 @@ import ( "time" "github.com/yuin/goldmark" - "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer/html" ) @@ -43,18 +42,4 @@ func main() { fmt.Printf("file: %s\n", file) fmt.Printf("iteration: %d\n", n) fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0) - - sum = time.Duration(0) - for i := 0; i < n; i++ { - start := time.Now() - out.Reset() - if err := markdown.Convert(source, &out, parser.WithWorkers(16)); err != nil { - panic(err) - } - sum += time.Since(start) - } - fmt.Printf("------- goldmark(workers=16) -------\n") - fmt.Printf("file: %s\n", file) - fmt.Printf("iteration: %d\n", n) - fmt.Printf("average: %.10f sec\n", float64((int64(sum)/int64(n)))/1000000000.0) } diff --git a/_benchmark/go/benchmark_test.go b/_benchmark/go/benchmark_test.go index e0e0bf5..e1854ab 100644 --- a/_benchmark/go/benchmark_test.go +++ b/_benchmark/go/benchmark_test.go @@ -7,26 +7,16 @@ import ( gomarkdown "github.com/gomarkdown/markdown" "github.com/yuin/goldmark" - "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/util" "gitlab.com/golang-commonmark/markdown" - bf1 "github.com/russross/blackfriday" bf2 "gopkg.in/russross/blackfriday.v2" "github.com/b3log/lute" ) func BenchmarkMarkdown(b *testing.B) { - b.Run("Blackfriday-v1", func(b *testing.B) { - r := func(src []byte) ([]byte, error) { - out := bf1.MarkdownBasic(src) - return out, nil - } - doBenchmark(b, r) - }) - b.Run("Blackfriday-v2", func(b *testing.B) { r := func(src []byte) ([]byte, error) { out := bf2.Run(src) @@ -35,25 +25,13 @@ func BenchmarkMarkdown(b *testing.B) { doBenchmark(b, r) }) - b.Run("GoldMark(workers=16)", func(b *testing.B) { - markdown := goldmark.New( - goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()), - ) - r := func(src []byte) ([]byte, error) { - var out bytes.Buffer - err := markdown.Convert(src, &out, parser.WithWorkers(16)) - return out.Bytes(), err - } - doBenchmark(b, r) - }) - b.Run("GoldMark", func(b *testing.B) { markdown := goldmark.New( goldmark.WithRendererOptions(html.WithXHTML(), html.WithUnsafe()), ) r := func(src []byte) ([]byte, error) { var out bytes.Buffer - err := markdown.Convert(src, &out, parser.WithWorkers(0)) + err := markdown.Convert(src, &out) return out.Bytes(), err } doBenchmark(b, r) @@ -70,12 +48,15 @@ func BenchmarkMarkdown(b *testing.B) { }) b.Run("Lute", func(b *testing.B) { - luteEngine := lute.New( - lute.GFM(false), - lute.CodeSyntaxHighlight(false), - lute.SoftBreak2HardBreak(false), - lute.AutoSpace(false), - lute.FixTermTypo(false)) + luteEngine := lute.New() + luteEngine.SetGFMAutoLink(false) + luteEngine.SetGFMStrikethrough(false) + luteEngine.SetGFMTable(false) + luteEngine.SetGFMTaskListItem(false) + luteEngine.SetCodeSyntaxHighlight(false) + luteEngine.SetSoftBreak2HardBreak(false) + luteEngine.SetAutoSpace(false) + luteEngine.SetFixTermTypo(false) r := func(src []byte) ([]byte, error) { out, err := luteEngine.MarkdownStr("Benchmark", util.BytesToReadOnlyString(src)) return util.StringToReadOnlyBytes(out), err diff --git a/extension/footnote.go b/extension/footnote.go index 4a17458..9be7f41 100644 --- a/extension/footnote.go +++ b/extension/footnote.go @@ -140,7 +140,7 @@ func (s *footnoteParser) Parse(parent gast.Node, block text.Reader, pc parser.Co block.Advance(closes + 1) var list *ast.FootnoteList - if tlist := pc.Root().Get(footnoteListKey); tlist != nil { + if tlist := pc.Get(footnoteListKey); tlist != nil { list = tlist.(*ast.FootnoteList) } if list == nil { diff --git a/parser/link.go b/parser/link.go index 324d271..bdefc81 100644 --- a/parser/link.go +++ b/parser/link.go @@ -169,7 +169,7 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N block.SetPosition(l, pos) ssegment := text.NewSegment(last.Segment.Stop, segment.Start) maybeReference := block.Value(ssegment) - ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference)) + ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) if !ok { ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment) return nil @@ -243,7 +243,7 @@ func (s *linkParser) parseReferenceLink(parent ast.Node, last *linkLabelState, b maybeReference = block.Value(ssegment) } - ref, ok := pc.Root().Reference(util.ToLinkReference(maybeReference)) + ref, ok := pc.Reference(util.ToLinkReference(maybeReference)) if !ok { return nil, true } diff --git a/parser/parser.go b/parser/parser.go index 18b4e59..917da5b 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -196,9 +196,6 @@ type Context interface { // LastOpenedBlock returns a last node that is currently in parsing. LastOpenedBlock() Block - - // Root returns a context shared accross goroutines. - Root() Context } type parseContext struct { @@ -210,7 +207,6 @@ type parseContext struct { delimiters *Delimiter lastDelimiter *Delimiter openedBlocks []Block - root Context } // NewContext returns a new Context. @@ -224,7 +220,6 @@ func NewContext() Context { delimiters: nil, lastDelimiter: nil, openedBlocks: []Block{}, - root: nil, } } @@ -361,140 +356,6 @@ func (p *parseContext) LastOpenedBlock() Block { return Block{} } -func (p *parseContext) Root() Context { - if p.root == nil { - return p - } - return p.root -} - -type concurrentParseContext struct { - delegate Context - m sync.RWMutex - root Context -} - -func NewConcurrentContext(delegate Context) Context { - return &concurrentParseContext{ - delegate: delegate, - root: nil, - } -} - -func (p *concurrentParseContext) Get(key ContextKey) interface{} { - p.m.RLock() - defer p.m.RUnlock() - ret := p.delegate.Get(key) - return ret -} - -func (p *concurrentParseContext) Set(key ContextKey, value interface{}) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.Set(key, value) -} - -func (p *concurrentParseContext) IDs() IDs { - return p.delegate.IDs() -} - -func (p *concurrentParseContext) BlockOffset() int { - return p.delegate.BlockOffset() -} - -func (p *concurrentParseContext) SetBlockOffset(v int) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.SetBlockOffset(v) -} - -func (p *concurrentParseContext) BlockIndent() int { - return p.delegate.BlockIndent() -} - -func (p *concurrentParseContext) SetBlockIndent(v int) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.SetBlockIndent(v) -} - -func (p *concurrentParseContext) LastDelimiter() *Delimiter { - return p.delegate.LastDelimiter() -} - -func (p *concurrentParseContext) FirstDelimiter() *Delimiter { - return p.delegate.FirstDelimiter() -} - -func (p *concurrentParseContext) PushDelimiter(d *Delimiter) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.PushDelimiter(d) -} - -func (p *concurrentParseContext) RemoveDelimiter(d *Delimiter) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.RemoveDelimiter(d) -} - -func (p *concurrentParseContext) ClearDelimiters(bottom ast.Node) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.ClearDelimiters(bottom) -} - -func (p *concurrentParseContext) AddReference(ref Reference) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.AddReference(ref) -} - -func (p *concurrentParseContext) Reference(label string) (Reference, bool) { - p.m.RLock() - defer p.m.RUnlock() - v, ok := p.delegate.Reference(label) - return v, ok -} - -func (p *concurrentParseContext) References() []Reference { - p.m.RLock() - defer p.m.RUnlock() - ret := p.delegate.References() - return ret -} - -func (p *concurrentParseContext) String() string { - p.m.RLock() - defer p.m.RUnlock() - ret := p.delegate.String() - return ret -} - -func (p *concurrentParseContext) OpenedBlocks() []Block { - return p.delegate.OpenedBlocks() -} - -func (p *concurrentParseContext) SetOpenedBlocks(v []Block) { - p.m.Lock() - defer p.m.Unlock() - p.delegate.SetOpenedBlocks(v) -} - -func (p *concurrentParseContext) LastOpenedBlock() Block { - p.m.RLock() - defer p.m.RUnlock() - ret := p.delegate.LastOpenedBlock() - return ret -} - -func (p *concurrentParseContext) Root() Context { - if p.root == nil { - return p - } - return p.root -} - // State represents parser's state. // State is designed to use as a bit flag. type State int @@ -906,7 +767,6 @@ func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionNa // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse. type ParseConfig struct { Context Context - Workers int } // A ParseOption is a functional option type for the Parser.Parse. @@ -920,15 +780,6 @@ func WithContext(context Context) ParseOption { } } -// WithWorkers is a functional option that allow you to set -// number of inline parsing workers(goroutines). -// If num is 0, inline parsing will never be multithreaded. -func WithWorkers(num int) ParseOption { - return func(c *ParseConfig) { - c.Workers = num - } -} - func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { p.initSync.Do(func() { p.config.BlockParsers.Sort() @@ -966,45 +817,10 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node { root := ast.NewDocument() p.parseBlocks(root, reader, pc) - if c.Workers < 2 { - blockReader := text.NewBlockReader(reader.Source(), nil) - p.walkBlock(root, func(node ast.Node) { - p.parseBlock(blockReader, node, pc) - }) - } else { - nodes := make([]ast.Node, 0, 100) - p.walkBlock(root, func(node ast.Node) { - nodes = append(nodes, node) - }) - max := (len(nodes) / c.Workers) - 1 - if max < 0 { - blockReader := text.NewBlockReader(reader.Source(), nil) - p.walkBlock(root, func(node ast.Node) { - p.parseBlock(blockReader, node, pc) - }) - } else { - rootContext := NewConcurrentContext(pc) - var wg sync.WaitGroup - for i := 0; i <= max; i++ { - from := i * c.Workers - to := from + c.Workers - if i == max { - to = len(nodes) - } - wg.Add(1) - go func(wg *sync.WaitGroup) { - blockReader := text.NewBlockReader(reader.Source(), nil) - pc := NewContext() - pc.(*parseContext).root = rootContext - for _, n := range nodes[from:to] { - p.parseBlock(blockReader, n, pc) - } - wg.Done() - }(&wg) - } - wg.Wait() - } - } + blockReader := text.NewBlockReader(reader.Source(), nil) + p.walkBlock(root, func(node ast.Node) { + p.parseBlock(blockReader, node, pc) + }) for _, at := range p.astTransformers { at.Transform(root, reader, pc) } diff --git a/testutil/testutil.go b/testutil/testutil.go index 98226f6..4a83a03 100644 --- a/testutil/testutil.go +++ b/testutil/testutil.go @@ -10,7 +10,6 @@ import ( "strings" "github.com/yuin/goldmark" - "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/util" ) @@ -131,7 +130,7 @@ Actual } }() - if err := m.Convert([]byte(testCase.Markdown), &out, parser.WithWorkers(16)); err != nil { + if err := m.Convert([]byte(testCase.Markdown), &out); err != nil { panic(err) } ok = bytes.Equal(bytes.TrimSpace(out.Bytes()), bytes.TrimSpace([]byte(testCase.Expected)))