From f6d67a136fea0cf09f83042a2d8817c589b0920b Mon Sep 17 00:00:00 2001 From: Arnau Casau <47946624+arnaucasau@users.noreply.github.com> Date: Tue, 30 Jan 2024 16:03:20 +0100 Subject: [PATCH] Improve the API script to handle `|` characters (#718) ### Summary This PR changes how we handle pipes inside math expressions when we convert sphinx HTML into markdown. Some math expressions use pipes to define quantum states using the Dirac notation, and we need to escape those characters to avoid breaking the page when the pipe characters are used inside a markdown table. ### Details One solution could only handle the `|` characters used inside a table, but given that the math expressions could be used in nested tags (e.g `

SOME_EXPRESSION

`), and would need to make the script more complex without ensuring we fix all the cases where we could make the page to fail to render, I decided to handle that character differently in all math expressions. The PR replaces the `|` character with `\vert ` which will represent the same character. `\vert` needs extra space at the end to handle cases where the pipe was next to a non-numerical character. In those cases, we should avoid converting `|x` to `\vertx` given that the latter is not a valid command (`\vert x` is the correct conversion). We also need to take into account that, in some cases, we still need to use the `|` characters because when escaped (`\|`), it represents a double pipe (`||`), which could be used in different mathematical expressions like the length of a vector. This is the regex used, which only matches pipe characters not preceded by a backslash: ```ts /(?
\\[\\begin{split}CCX q_0, q_1, q_2 = - I \\otimes I \\otimes |0 \\rangle \\langle 0| + CX \\otimes |1 \\rangle \\langle 1| = + I \\otimes I \\otimes \vert 0 \\rangle \\langle 0\vert + CX \\otimes \vert 1 \\rangle \\langle 1\vert = \\begin{pmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\\\ @@ -1061,7 +1061,7 @@ bits.

).toMatchInlineSnapshot(` "$$ \\begin{split}CCX q_0, q_1, q_2 = - I \\otimes I \\otimes |0 \\rangle \\langle 0| + CX \\otimes |1 \\rangle \\langle 1| = + I \\otimes I \\otimes \vert 0 \\rangle \\langle 0\vert + CX \\otimes \vert 1 \\rangle \\langle 1\vert = \\begin{pmatrix} 1 & 0 & 0 & 0 & 0 & 0 & 0 & 0\\\\ 0 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\\\ @@ -1427,6 +1427,47 @@ test("test dt tag without id", async () => { `); }); +test("test replacement of the pipe character for `\vert` on math expressions", async () => { + expect( + await toMd(` +
+

Methods

+ + + + + + + + + + + + + + +

This is an example of using the | character outside of a math expression

Example single pipe: \\(\\mathcal{Q}^k \\mathcal{A} |0\\rangle\\).

Example double pipe: The length of the vector x is \\(\\|x\\|_2\\).

+

This is a math expression outside the table:

\\[\\mathcal{Q}^k \\mathcal{A} |0\\rangle\\]

+
+ `), + ).toMatchInlineSnapshot(` + "## Methods + + | | | + | ---------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | + | [](#text-with-pipe "text with pipe") | This is an example of using the \\| character outside of a math expression | + | [](#span-tag-math-expressions-with-pipe "(span tag) math expressions with pipe") | Example single pipe: $\\mathcal{Q}^k \\mathcal{A} \\vert 0\\rangle$. | + | [](#span-tag-math-expressions-with-double-pipe "(span tag) math expressions with double pipe") | Example double pipe: The length of the vector x is $\\|x\\|_2$. | + + This is a math expression outside the table: + + $$ + \\mathcal{Q}^k \\mathcal{A} \\vert 0\\rangle + $$ + " + `); +}); + async function toMd(html: string) { return ( await sphinxHtmlToMarkdown({ diff --git a/scripts/lib/api/htmlToMd.ts b/scripts/lib/api/htmlToMd.ts index 927c780645c..bdafac327c4 100644 --- a/scripts/lib/api/htmlToMd.ts +++ b/scripts/lib/api/htmlToMd.ts @@ -71,6 +71,9 @@ async function generateMarkdownFile( const sufix = "\\)"; if (value.startsWith(prefix) && value.endsWith(sufix)) { value = value.substring(prefix.length, value.length - sufix.length); + // We need to replace the single `|` characters for `\vert ` to avoid page crashes when + // they are used inside a table. For more information: https://github.com/Qiskit/documentation/issues/488 + value = value.replace(/(?