From cee262f9211f6bdd3e353e5f51462defba4ae8ae Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 3 Feb 2018 22:57:36 +0100 Subject: [PATCH 01/28] Handle numeric argument values without quotes --- src/Parser.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index c39afb1..37c4748 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -417,7 +417,7 @@ protected function parseTag() } elseif (in_array($this->html[$pos], [' ', "\t", "\n"])) { // drop whitespace } elseif (in_array($this->html[$pos] . $this->html[$pos + 1], ['="', "='"])) { - // get attribute value + // get string attribute value $pos++; $await = $this->html[$pos]; // single or double quote $pos++; @@ -428,6 +428,16 @@ protected function parseTag() } $attributes[$currAttrib] = $value; $currAttrib = ''; + } elseif ($this->html[$pos] === '=' && is_numeric($this->html[$pos + 1]) && in_array((int)$this->html[$pos + 1], range(0, 9), true)) { + // get integer attribute value + $pos++; + $value = ''; + while (isset($this->html[$pos]) && is_numeric($this->html[$pos]) && in_array((int)$this->html[$pos], range(0, 9), true)) { + $value .= $this->html[$pos]; + $pos++; + } + $attributes[$currAttrib] = $value; + $currAttrib = ''; } else { $this->invalidTag(); From d0da9e0fae21d160bef3e6c16dbfd5d0fe7049b9 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sun, 4 Feb 2018 00:56:13 +0100 Subject: [PATCH 02/28] Adjust fixInlineElementSpacing to not trigger for emptyTags --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index f4b3758..7acaf94 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -1360,7 +1360,7 @@ protected function fixBlockElementSpacing() */ protected function fixInlineElementSpacing() { - if ($this->parser->isStartTag) { + if ($this->parser->isStartTag && !$this->parser->isEmptyTag) { // move spaces after the start element to before the element if (preg_match('~^(\s+)~', $this->parser->html, $matches)) { $this->out($matches[1]); From d64cd740ad96212c9f7245f7b5c7c9a7250ed146 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sun, 4 Feb 2018 02:22:34 +0100 Subject: [PATCH 03/28] Allow to disable adding the CSS class after the tag --- src/ConverterExtra.php | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/ConverterExtra.php b/src/ConverterExtra.php index 3c3a04d..7c0d1a9 100644 --- a/src/ConverterExtra.php +++ b/src/ConverterExtra.php @@ -28,6 +28,13 @@ class ConverterExtra extends Converter */ protected $row = 0; + /** + * Add CSS class after the tag + * + * @var bool + */ + protected $addCssClass = true; + /** * constructor, see Markdownify::Markdownify() for more information */ @@ -120,7 +127,7 @@ protected function handleHeader($level) $this->stack(); } else { $tag = $this->unstack(); - if (!empty($tag['cssSelector'])) { + if (!empty($tag['cssSelector']) && $this->addCssClass) { // {#id.class} $this->out(' {' . $tag['cssSelector'] . '}'); } @@ -150,7 +157,7 @@ protected function handleTag_a_parser() protected function handleTag_a_converter($tag, $buffer) { $output = parent::handleTag_a_converter($tag, $buffer); - if (!empty($tag['cssSelector'])) { + if (!empty($tag['cssSelector']) && $this->addCssClass) { // [This link][id]{#id.class} $output .= '{' . $tag['cssSelector'] . '}'; } @@ -570,4 +577,15 @@ protected function getCurrentCssSelector() } return $cssSelector; } + + /** + * set add CSS class after the tag + * + * @param bool $addCssClass + * @return void + */ + public function setAddCssClass($addCssClass) + { + $this->addCssClass = $addCssClass; + } } From d2eeedc3bd6ef5ba0866c77208c798e9400e0271 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sun, 4 Feb 2018 02:53:43 +0100 Subject: [PATCH 04/28] Adjusted test case for this commit d0da9e0fae21d160bef3e6c16dbfd5d0fe7049b9 --- test/ConverterTestCase.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ConverterTestCase.php b/test/ConverterTestCase.php index f0ed30f..05580fd 100644 --- a/test/ConverterTestCase.php +++ b/test/ConverterTestCase.php @@ -486,7 +486,7 @@ public function providerFixBreaks() $data['break1']['html'] = "Hello,
How are you doing?
"; $data['break1']['md'] = "**Hello, \nHow are you doing?**"; $data['break2']['html'] = "Hey,
How you're doing?


Sorry

You can't get through
"; - $data['break2']['md'] = "**Hey, \nHow you're doing?** \n \n**Sorry \n \nYou can't get through**"; + $data['break2']['md'] = "**Hey, \nHow you're doing?** \n \n**Sorry \n \nYou can't get through**"; return $data; } From 6f40bccfe86981f106c3407439fd732c45f43bb9 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Wed, 7 Feb 2018 18:49:09 +0100 Subject: [PATCH 05/28] Fix URL difference on ending slash presence --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 7acaf94..bed0f3c 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -787,7 +787,7 @@ protected function handleTag_a_converter($tag, $buffer) return '[' . $buffer . ']()'; } - if ($buffer == $tag['href'] && empty($tag['title'])) { + if (rtrim($buffer, '/') == rtrim($tag['href'], '/') && empty($tag['title'])) { // return '<' . $buffer . '>'; } From 56f44248ab9fa1ed663d7786597f4a5272660740 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Wed, 7 Feb 2018 23:08:42 +0100 Subject: [PATCH 06/28] Handle unquoted attribute values --- src/Parser.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 37c4748..7630e64 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -417,7 +417,7 @@ protected function parseTag() } elseif (in_array($this->html[$pos], [' ', "\t", "\n"])) { // drop whitespace } elseif (in_array($this->html[$pos] . $this->html[$pos + 1], ['="', "='"])) { - // get string attribute value + // get quoted attribute value $pos++; $await = $this->html[$pos]; // single or double quote $pos++; @@ -428,11 +428,11 @@ protected function parseTag() } $attributes[$currAttrib] = $value; $currAttrib = ''; - } elseif ($this->html[$pos] === '=' && is_numeric($this->html[$pos + 1]) && in_array((int)$this->html[$pos + 1], range(0, 9), true)) { - // get integer attribute value + } elseif ($this->html[$pos] === '=') { + // get unquoted attribute value $pos++; $value = ''; - while (isset($this->html[$pos]) && is_numeric($this->html[$pos]) && in_array((int)$this->html[$pos], range(0, 9), true)) { + while (isset($this->html[$pos]) && !in_array($this->html[$pos], array(' ', '/', '>'), true)) { $value .= $this->html[$pos]; $pos++; } From ea771c3774deb13ee362ce08a55b76166c600f80 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Thu, 8 Feb 2018 00:15:55 +0100 Subject: [PATCH 07/28] Escape all * and _ instead of just 1 or 2 --- src/Converter.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Converter.php b/src/Converter.php index bed0f3c..847731f 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -185,10 +185,8 @@ class Converter * TODO: what's with block chars / sequences at the beginning of a block? */ protected $escapeInText = [ - '\*\*([^*]+)\*\*' => '\*\*$1\*\*', // strong - '\*([^*]+)\*' => '\*$1\*', // em - '__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_', // strong - '_(?! |_)(.+)(?!<_| )_' => '\_$1\_', // em + '\*' => '\*', + '\_' => '\_', '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0', // hr '`' => '\`', // code '\[(.+)\](\s*\()' => '\[$1\]$2', // links: [text] (url) => [text\] (url) From e0241971af582ba3cd6ec0d8c6b82891ba9251b9 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Fri, 9 Feb 2018 01:04:40 +0100 Subject: [PATCH 08/28] Cleanup redundant spaces --- src/Converter.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 847731f..f7d6a1f 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -394,7 +394,8 @@ protected function parse() } } // cleanup - $this->output = rtrim(str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output)))); + $this->output = implode("\n", array_map('rtrim', explode("\n", $this->output))); + $this->output = str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output))); // end parsing, flush stacked tags $this->flushFootnotes(); $this->stack = []; From 9ab0dc9442be6522e3c0c339dcf6aaed8bc33af8 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Fri, 9 Feb 2018 01:09:51 +0100 Subject: [PATCH 09/28] One str_replace instead of three --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index f7d6a1f..8b1ffff 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -395,7 +395,7 @@ protected function parse() } // cleanup $this->output = implode("\n", array_map('rtrim', explode("\n", $this->output))); - $this->output = str_replace('&', '&', str_replace('<', '<', str_replace('>', '>', $this->output))); + $this->output = str_replace(array('&', '<', '>'), array('&', '<', '>'), $this->output); // end parsing, flush stacked tags $this->flushFootnotes(); $this->stack = []; From 6c9d979ff41cdbfdffdba512c26af776c9e056c8 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Thu, 15 Feb 2018 22:48:16 +0100 Subject: [PATCH 10/28] Adjust testcase for ea771c3774deb13ee362ce08a55b76166c600f80 --- test/ConverterTestCase.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ConverterTestCase.php b/test/ConverterTestCase.php index 05580fd..502e515 100644 --- a/test/ConverterTestCase.php +++ b/test/ConverterTestCase.php @@ -462,7 +462,7 @@ public function providerRulesConversion() $data['escape-']['html'] = '-----------------------------------'; $data['escape-']['md'] = '\---\---\---\---\---\---\---\---\---\---\-----'; $data['escape-']['html'] = '*****************'; - $data['escape-']['md'] = '\***\***\***\***\*****'; + $data['escape-']['md'] = '\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*'; return $data; } From eea8837540a90af9ae96df8a56b909396efbe48d Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Thu, 15 Feb 2018 23:00:58 +0100 Subject: [PATCH 11/28] Add back the final rtrim that was removed in e0241971af582ba3cd6ec0d8c6b82891ba9251b9 --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 8b1ffff..b08f810 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -395,7 +395,7 @@ protected function parse() } // cleanup $this->output = implode("\n", array_map('rtrim', explode("\n", $this->output))); - $this->output = str_replace(array('&', '<', '>'), array('&', '<', '>'), $this->output); + $this->output = rtrim(str_replace(array('&', '<', '>'), array('&', '<', '>'), $this->output)); // end parsing, flush stacked tags $this->flushFootnotes(); $this->stack = []; From 55e5f54d7f86983efe6786f444f1cfe27ea7ada2 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Thu, 15 Feb 2018 23:01:25 +0100 Subject: [PATCH 12/28] Correct test case for e0241971af582ba3cd6ec0d8c6b82891ba9251b9 --- test/ConverterTestCase.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/ConverterTestCase.php b/test/ConverterTestCase.php index 502e515..984bca2 100644 --- a/test/ConverterTestCase.php +++ b/test/ConverterTestCase.php @@ -146,7 +146,7 @@ public function providerBlockquoteConversion() $data['simple']['md'] = '> blockquoted text goes here'; $data['paragraphs']['html'] = '

paragraph1

paragraph2

'; $data['paragraphs']['md'] = '> paragraph1' . PHP_EOL - . '> ' . PHP_EOL + . '>' . PHP_EOL . '> paragraph2'; $data['cascade']['html'] = '
cascading blockquote
'; $data['cascade']['md'] = '> > cascading blockquote'; @@ -189,7 +189,7 @@ public function providerListConversion() . ' 2. Magic'; $data['next-to-text-in-block-context']['html'] = '
McHale
  1. Bird
  2. Magic
'; $data['next-to-text-in-block-context']['md'] = '> McHale' . PHP_EOL - . '> ' . PHP_EOL + . '>' . PHP_EOL . '> 1. Bird' . PHP_EOL . '> 2. Magic'; $data['next-to-bold']['html'] = 'McHale
  1. Bird
  2. Magic
'; @@ -198,7 +198,7 @@ public function providerListConversion() . ' 1. Bird' . PHP_EOL . ' 2. Magic'; $data['next-to-bold-and-br']['html'] = 'McHale
  1. Bird
  2. Magic
'; - $data['next-to-bold-and-br']['md'] = '**McHale** ' . PHP_EOL + $data['next-to-bold-and-br']['md'] = '**McHale**' . PHP_EOL . PHP_EOL . PHP_EOL . ' 1. Bird' . PHP_EOL @@ -484,9 +484,9 @@ public function providerFixBreaks() { $data = []; $data['break1']['html'] = "Hello,
How are you doing?
"; - $data['break1']['md'] = "**Hello, \nHow are you doing?**"; + $data['break1']['md'] = "**Hello,\nHow are you doing?**"; $data['break2']['html'] = "Hey,
How you're doing?


Sorry

You can't get through
"; - $data['break2']['md'] = "**Hey, \nHow you're doing?** \n \n**Sorry \n \nYou can't get through**"; + $data['break2']['md'] = "**Hey,\nHow you're doing?**\n\n**Sorry\n\nYou can't get through**"; return $data; } From 7a0f6a1f92eddf8ae4bfc00a2bf231ac2fb8fc81 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 17 Feb 2018 22:05:22 +0100 Subject: [PATCH 13/28] Change all html EOLs to line feeds --- src/Converter.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Converter.php b/src/Converter.php index 787f020..2e3b026 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -272,6 +272,7 @@ public function parseString($html) { $this->resetState(); + $html = str_replace(array("\r\n", "\r"), "\n", $html); $this->parser->html = $html; $this->parse(); From e4a1f9b288ac7d047dab962346da3c5ef4b89265 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 17 Feb 2018 22:06:56 +0100 Subject: [PATCH 14/28] flushLinebreaks added before handling text --- src/Converter.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Converter.php b/src/Converter.php index 2e3b026..6aaea71 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -327,6 +327,7 @@ protected function parse() // else drop break; case 'text': + $this->flushLinebreaks(); $this->handleText(); break; case 'tag': From a72f108370e3e7e316c7d2ee8ebb450c1dd2185d Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 17 Feb 2018 22:09:52 +0100 Subject: [PATCH 15/28] Decreased the number of lineBreaks after blockelements --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 6aaea71..9061011 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -507,7 +507,7 @@ protected function handleTagToText() { if (!$this->keepHTML) { if (!$this->parser->isStartTag && $this->parser->isBlockElement) { - $this->setLineBreaks(2); + $this->setLineBreaks(1); } } else { // don't convert to markdown inside this tag From 75cf897f806bf0e33d977fa51ec232c1094ec392 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 17 Feb 2018 22:12:29 +0100 Subject: [PATCH 16/28] Added ltrim for html content after closing p tag --- src/Converter.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Converter.php b/src/Converter.php index 9061011..7a5ec0e 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -733,6 +733,7 @@ protected function handleTag_p() { if (!$this->parser->isStartTag) { $this->setLineBreaks(2); + $this->parser->html = ltrim($this->parser->html); } } From 552ba1b22f14b87b1cc9e492944b0426bf7c6d69 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Mon, 26 Feb 2018 23:59:38 +0100 Subject: [PATCH 17/28] Ignore Office namespace o:p tags --- src/Converter.php | 1 + src/Parser.php | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 7a5ec0e..38cbf3d 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -133,6 +133,7 @@ class Converter protected $ignore = [ 'html', 'body', + 'o:p', ]; /** diff --git a/src/Parser.php b/src/Parser.php index f96802d..d9efa29 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -171,6 +171,7 @@ class Parser 'noframes' => true, 'noscript' => true, 'ol' => true, + 'o:p' => true, 'p' => true, 'pre' => true, 'table' => true, @@ -369,7 +370,7 @@ protected function parseTag() // get tagName while (isset($this->html[$pos])) { $pos_ord = ord(strtolower($this->html[$pos])); - if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos]))) { + if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || (!empty($tagName) && is_numeric($this->html[$pos])) || in_array($pos_ord, static::$special_ords)) { $tagName .= $this->html[$pos]; $pos++; } else { From 0bea029f0e915f955ec943a674e6584666bed9c6 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Tue, 27 Feb 2018 20:44:51 +0100 Subject: [PATCH 18/28] Add function getescapeInText --- src/Converter.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Converter.php b/src/Converter.php index 38cbf3d..7a7954e 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -302,6 +302,16 @@ public function setKeepHTML($keepHTML) $this->keepHTML = $keepHTML; } + /** + * return escapeInText + * + * @return array escapeInText + */ + public function getescapeInText() + { + return $this->escapeInText; + } + /** * iterate through the nodes and decide what we * shall do with the current node From 475af0004e47baeef2514dbae8027de1dd354928 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Tue, 27 Feb 2018 20:45:52 +0100 Subject: [PATCH 19/28] Fix header markdown escaping --- src/Converter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Converter.php b/src/Converter.php index 7a7954e..97cc160 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -254,7 +254,7 @@ public function __construct($linkPosition = self::LINK_AFTER_CONTENT, $bodyWidth $search = []; $replace = []; foreach ($this->escapeInText as $s => $r) { - array_push($search, '@(?escapeInText = [ From dc77382d05826526b31dc289b402a65de361a827 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Tue, 27 Feb 2018 20:46:50 +0100 Subject: [PATCH 20/28] Add escaping for = markdown headers --- src/Converter.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Converter.php b/src/Converter.php index 97cc160..2810f18 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -191,6 +191,7 @@ class Converter '\[(.+)\](\s*\()' => '\[$1\]$2', // links: [text] (url) => [text\] (url) '\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]', // links: [text][id] => [text\][id\] '^#(#{0,5}) ' => '\#$1 ', // header + '^=(=*\h*)$' => '\\\\=$1', // header ]; /** From 27131acbcbe0282c62f4784eb64e3070c296f2fc Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Tue, 27 Feb 2018 20:48:26 +0100 Subject: [PATCH 21/28] Add proper amount of slashes for escape regex's --- src/Converter.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Converter.php b/src/Converter.php index 2810f18..a87bc6d 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -184,13 +184,13 @@ class Converter * TODO: what's with block chars / sequences at the beginning of a block? */ protected $escapeInText = [ - '\*' => '\*', - '\_' => '\_', + '\*' => '\\\\*', + '_' => '\\\\_', '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0', // hr - '`' => '\`', // code - '\[(.+)\](\s*\()' => '\[$1\]$2', // links: [text] (url) => [text\] (url) - '\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]', // links: [text][id] => [text\][id\] - '^#(#{0,5}) ' => '\#$1 ', // header + '`' => '\\\\`', // code + '\[(.+)\](\s*\()' => '\\\\[$1\\\\]$2', // links: [text] (url) => [text\] (url) + '\[(.+)\](\s*)\[(.*)\]' => '\\\\[$1\\\\]$2\\\\[$3\\\\]', // links: [text][id] => [text\][id\] + '^#(#{0,5}) ' => '\\\\#$1 ', // header '^=(=*\h*)$' => '\\\\=$1', // header ]; From f6a32907bc4a4d693505c4508b8c0ae0ecfbffc5 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 3 Mar 2018 00:42:08 +0100 Subject: [PATCH 22/28] Correction incase the last attribute is unquoted --- src/Parser.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parser.php b/src/Parser.php index d9efa29..d7c26ea 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -435,6 +435,7 @@ protected function parseTag() $value .= $this->html[$pos]; $pos++; } + $pos--; $attributes[$currAttrib] = $value; $currAttrib = ''; } else { From 9efd59c06995602d19a76695c2ea89801bec9980 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 3 Mar 2018 00:55:00 +0100 Subject: [PATCH 23/28] Replace   with normal space --- src/Converter.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Converter.php b/src/Converter.php index a87bc6d..eb8e03f 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -407,7 +407,7 @@ protected function parse() } // cleanup $this->output = implode("\n", array_map('rtrim', explode("\n", $this->output))); - $this->output = rtrim(str_replace(array('&', '<', '>'), array('&', '<', '>'), $this->output)); + $this->output = rtrim(str_replace(['&', '<', '>', ' '], ['&', '<', '>', ' '], $this->output)); // end parsing, flush stacked tags $this->flushFootnotes(); $this->stack = []; @@ -591,7 +591,7 @@ protected function handleTagToText() $this->buffer(); } else { // add stuff so cleanup just reverses this - $this->out(str_replace('<', '&lt;', str_replace('>', '&gt;', $this->unbuffer()))); + $this->out(str_replace(['<', '>'], ['&lt;', '&gt;'], $this->unbuffer())); } } } From fc34c2757137d2ca13447c0ab49a734dcc807a94 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Sat, 3 Mar 2018 01:09:41 +0100 Subject: [PATCH 24/28] Add more character to escapeInText --- src/Converter.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Converter.php b/src/Converter.php index eb8e03f..d369e20 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -184,14 +184,15 @@ class Converter * TODO: what's with block chars / sequences at the beginning of a block? */ protected $escapeInText = [ - '\*' => '\\\\*', - '_' => '\\\\_', + '\*' => '\\\\*', // * + '_' => '\\\\_', // _ + '\|' => '\\\\|', // | '([-*_])([ ]{0,2}\1){2,}' => '\\\\$0', // hr '`' => '\\\\`', // code '\[(.+)\](\s*\()' => '\\\\[$1\\\\]$2', // links: [text] (url) => [text\] (url) '\[(.+)\](\s*)\[(.*)\]' => '\\\\[$1\\\\]$2\\\\[$3\\\\]', // links: [text][id] => [text\][id\] - '^#(#{0,5}) ' => '\\\\#$1 ', // header - '^=(=*\h*)$' => '\\\\=$1', // header + '^#(#{0,5}) ' => '\\\\#$1 ', // header # + '^=(=*\h*)$' => '\\\\=$1', // header = ]; /** From fd6763e2cc4db4c2eb29baa9d9de93b50012cb16 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Fri, 12 Jul 2019 20:00:45 +0200 Subject: [PATCH 25/28] Allow numbers in xmlns attributes names --- src/Parser.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index d722565..6cb3b64 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -7,6 +7,8 @@ class Parser public static $skipWhitespace = true; public static $a_ord; public static $z_ord; + public static $n0_ord; + public static $n9_ord; public static $special_ords; /** @@ -355,6 +357,8 @@ protected function parseTag() if (!isset(static::$a_ord)) { static::$a_ord = ord('a'); static::$z_ord = ord('z'); + static::$n0_ord = ord('0'); + static::$n9_ord = ord('9'); static::$special_ords = [ ord(':'), // for xml:lang ord('-'), // for http-equiv @@ -411,7 +415,7 @@ protected function parseTag() } $pos_ord = ord(strtolower($this->html[$pos])); - if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords)) { + if (($pos_ord >= static::$a_ord && $pos_ord <= static::$z_ord) || in_array($pos_ord, static::$special_ords) || (substr($currAttrib, 0, 5) === 'xmlns' && $pos_ord >= static::$n0_ord && $pos_ord <= static::$n9_ord)) { // attribute name $currAttrib .= $this->html[$pos]; } elseif (in_array($this->html[$pos], [' ', "\t", "\n"])) { From b9b3f416bce84c81046282ef3011413312d20b9d Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Fri, 1 May 2020 13:41:40 +0200 Subject: [PATCH 26/28] Fix empty table tag --- src/ConverterExtra.php | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ConverterExtra.php b/src/ConverterExtra.php index 4aedd16..b9bdef7 100644 --- a/src/ConverterExtra.php +++ b/src/ConverterExtra.php @@ -302,15 +302,17 @@ protected function handleTag_table() $rows = []; // add padding array_walk_recursive($this->table['rows'], [&$this, 'alignTdContent']); - $header = array_shift($this->table['rows']); - array_push($rows, '| ' . implode(' | ', $header) . ' |'); - array_push($rows, $separator); - foreach ($this->table['rows'] as $row) { - array_push($rows, '| ' . implode(' | ', $row) . ' |'); - } - $this->out(implode("\n" . $this->indent, $rows)); - $this->table = []; - $this->setLineBreaks(2); + if (!empty( $this->table['rows'])) { + $header = array_shift($this->table['rows']); + array_push($rows, '| ' . implode(' | ', $header) . ' |'); + array_push($rows, $separator); + foreach ($this->table['rows'] as $row) { + array_push($rows, '| ' . implode(' | ', $row) . ' |'); + } + $this->out(implode("\n" . $this->indent, $rows)); + } + $this->table = []; + $this->setLineBreaks(2); } } From 9623ae443a01b452c83a7355b0ebc293cff7fb46 Mon Sep 17 00:00:00 2001 From: SL-Gundam Date: Fri, 1 May 2020 13:50:59 +0200 Subject: [PATCH 27/28] Correct indentation --- src/ConverterExtra.php | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ConverterExtra.php b/src/ConverterExtra.php index b9bdef7..3595f3e 100644 --- a/src/ConverterExtra.php +++ b/src/ConverterExtra.php @@ -302,17 +302,17 @@ protected function handleTag_table() $rows = []; // add padding array_walk_recursive($this->table['rows'], [&$this, 'alignTdContent']); - if (!empty( $this->table['rows'])) { - $header = array_shift($this->table['rows']); - array_push($rows, '| ' . implode(' | ', $header) . ' |'); - array_push($rows, $separator); - foreach ($this->table['rows'] as $row) { - array_push($rows, '| ' . implode(' | ', $row) . ' |'); - } - $this->out(implode("\n" . $this->indent, $rows)); - } - $this->table = []; - $this->setLineBreaks(2); + if (!empty( $this->table['rows'])) { + $header = array_shift($this->table['rows']); + array_push($rows, '| ' . implode(' | ', $header) . ' |'); + array_push($rows, $separator); + foreach ($this->table['rows'] as $row) { + array_push($rows, '| ' . implode(' | ', $row) . ' |'); + } + $this->out(implode("\n" . $this->indent, $rows)); + } + $this->table = []; + $this->setLineBreaks(2); } } From 7db946f0a9536f092e74a712bef5d33b316b3441 Mon Sep 17 00:00:00 2001 From: Thomas ZILLIOX Date: Fri, 23 Feb 2024 17:49:55 +0100 Subject: [PATCH 28/28] Fix PHP8.3 support --- src/Converter.php | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Converter.php b/src/Converter.php index f157323..8545a55 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -227,6 +227,13 @@ class Converter */ protected $indent = ''; + /** + * previous indentation, when we want to disable current indentation and get it back later + * + * @var string + */ + static $previousIndent = ''; + /** * constructor, set options, setup parser * @@ -534,8 +541,7 @@ protected function handleTagToText() // don't indent inside
 tags
                     if ($this->parser->tagName == 'pre') {
                         $this->out($this->parser->node);
-                        static $indent;
-                        $indent = $this->indent;
+                        $this->previousIndent = $this->indent;
                         $this->indent = '';
                     } else {
                         $this->out($this->parser->node . "\n" . $this->indent);
@@ -556,8 +562,7 @@ protected function handleTagToText()
                     } else {
                         // reset indentation
                         $this->out($this->parser->node);
-                        static $indent;
-                        $this->indent = $indent;
+                        $this->indent = $this->previousIndent;
                     }
 
                     if (in_array($this->parent(), ['ins', 'del'])) {