Skip to content

Commit

Permalink
Merge pull request #2827 from owid/gdocs-table
Browse files Browse the repository at this point in the history
🎉 Native gdocs tables
  • Loading branch information
ikesau authored Nov 1, 2023
2 parents 70cfd1b + bc1ebfa commit 6ba1911
Show file tree
Hide file tree
Showing 12 changed files with 569 additions and 72 deletions.
1 change: 1 addition & 0 deletions db/model/Gdoc/Gdoc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ export class Gdoc extends BaseEntity implements OwidGdocInterface {
"simple-text",
"sticky-left",
"sticky-right",
"table",
"text"
),
},
Expand Down
20 changes: 20 additions & 0 deletions db/model/Gdoc/enrichedToRaw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
RawBlockAlign,
RawBlockEntrySummary,
RawBlockVideo,
RawBlockTable,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { match, P } from "ts-pattern"
Expand Down Expand Up @@ -400,5 +401,24 @@ export function enrichedBlockToRawBlock(
},
}
})
.with({ type: "table" }, (b): RawBlockTable => {
return {
type: b.type,
value: {
template: b.template,
rows: b.rows.map((row) => ({
type: row.type,
value: {
cells: row.cells.map((cell) => ({
type: cell.type,
value: cell.content.map(
enrichedBlockToRawBlock
),
})),
},
})),
},
}
})
.exhaustive()
}
88 changes: 88 additions & 0 deletions db/model/Gdoc/exampleEnrichedBlocks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,4 +441,92 @@ export const enrichedBlockExamples: Record<
items: [{ text: "Hello", slug: "#link-to-something" }],
parseErrors: [],
},
table: {
type: "table",
template: "header-row",
size: "narrow",
rows: [
{
type: "table-row",
cells: [
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "City",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Continent",
},
],
parseErrors: [],
},
],
},
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Wellington",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Zealandia",
},
],
parseErrors: [],
},
],
},
{
type: "table-cell",
content: [
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Addis Ababa",
},
],
parseErrors: [],
},
{
type: "text",
value: [
{
spanType: "span-simple-text",
text: "Africa",
},
],
parseErrors: [],
},
],
},
],
},
],
parseErrors: [],
},
}
194 changes: 124 additions & 70 deletions db/model/Gdoc/gdocToArchie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,103 +6,157 @@ import {
RawBlockHorizontalRule,
RawBlockHeading,
isNil,
RawBlockTableRow,
RawBlockTableCell,
RawBlockText,
} from "@ourworldindata/utils"
import { spanToHtmlString } from "./gdocUtils.js"
import { OwidRawGdocBlockToArchieMLString } from "./rawToArchie.js"
import { match, P } from "ts-pattern"

function paragraphToString(
paragraph: docs_v1.Schema$Paragraph,
context: { isInList: boolean }
): string {
let text = ""

// this is a list
const needsBullet = !isNil(paragraph.bullet)
if (needsBullet && !context.isInList) {
context.isInList = true
text += `\n[.list]\n`
} else if (!needsBullet && context.isInList) {
context.isInList = false
text += `[]\n`
}

if (paragraph.elements) {
// all values in the element
const values: docs_v1.Schema$ParagraphElement[] = paragraph.elements

let idx = 0

const taggedText = function (text: string): string {
if (paragraph.paragraphStyle?.namedStyleType?.includes("HEADING")) {
const headingLevel =
paragraph.paragraphStyle.namedStyleType.replace(
"HEADING_",
""
)

const heading: RawBlockHeading = {
type: "heading",
value: {
text: text.trim(),
level: headingLevel,
},
}
return `\n${OwidRawGdocBlockToArchieMLString(heading)}`
}
return text
}
let elementText = ""
for (const value of values) {
// we only need to add a bullet to the first value, so we check
const isFirstValue = idx === 0

// prepend an asterisk if this is a list item
const prefix = needsBullet && isFirstValue ? "* " : ""

// concat the text
const parsedParagraph = parseParagraph(value)
const fragmentText = match(parsedParagraph)
.with(
{ type: P.union("horizontal-rule") },
OwidRawGdocBlockToArchieMLString
)
.with({ spanType: P.any }, (s) => spanToHtmlString(s))
.with(P.nullish, () => "")
.exhaustive()
elementText += `${prefix}${fragmentText}`
idx++
}
text += taggedText(elementText)
}
return text
}

function tableToString(
table: docs_v1.Schema$StructuralElement["table"]
): string {
if (!table) return ""
let text = ""
const { tableRows = [] } = table

const rows: RawBlockTableRow[] = []

for (const tableRow of tableRows) {
const rawRow: RawBlockTableRow = {
type: "table-row",
value: {
cells: [],
},
}
const { tableCells = [] } = tableRow
for (const tableCell of tableCells) {
const rawCell: RawBlockTableCell = {
type: "table-cell",
value: [],
}
const { content = [] } = tableCell
const context = { isInList: false }
for (const item of content) {
if (item.paragraph) {
const text = paragraphToString(item.paragraph, context)
const rawTextBlock: RawBlockText = {
type: "text",
value: text,
}
rawCell.value!.push(rawTextBlock)
}
}
// Close the list if paragraphToString didn't close it itself (because the last item was still in a list)
if (context.isInList) {
rawCell.value!.push({ type: "text", value: "[]" })
}
rawRow.value!.cells!.push(rawCell)
}
rows.push(rawRow)
}
text += "\n[.+rows]"
for (const row of rows) {
text += `\n${OwidRawGdocBlockToArchieMLString(row)}`
}
text += "\n[]"
return text
}

export async function gdocToArchie(
document: docs_v1.Schema$Document
): Promise<{ text: string }> {
// prepare the text holder
let text = ""
let isInList = false
const context = { isInList: false }

// check if the body key and content key exists, and give up if not
if (!document.body) return { text }
if (!document.body.content) return { text }

// loop through each content element in the body

for (const element of document.body.content) {
if (element.paragraph) {
// get the paragraph within the element
const paragraph: docs_v1.Schema$Paragraph = element.paragraph

// this is a list
const needsBullet = !isNil(paragraph.bullet)
if (needsBullet && !isInList) {
isInList = true
text += `\n[.list]\n`
} else if (!needsBullet && isInList) {
isInList = false
text += `[]\n`
}

if (paragraph.elements) {
// all values in the element
const values: docs_v1.Schema$ParagraphElement[] =
paragraph.elements

let idx = 0

const taggedText = function (text: string): string {
if (
paragraph.paragraphStyle?.namedStyleType?.includes(
"HEADING"
)
) {
const headingLevel =
paragraph.paragraphStyle.namedStyleType.replace(
"HEADING_",
""
)

const heading: RawBlockHeading = {
type: "heading",
value: {
text: text.trim(),
level: headingLevel,
},
}
return `\n${OwidRawGdocBlockToArchieMLString(heading)}`
}
return text
}
let elementText = ""
for (const value of values) {
// we only need to add a bullet to the first value, so we check
const isFirstValue = idx === 0

// prepend an asterisk if this is a list item
const prefix = needsBullet && isFirstValue ? "* " : ""

// concat the text
const parsedParagraph = parseParagraph(value)
const fragmentText = match(parsedParagraph)
.with(
{ type: P.union("horizontal-rule") },
OwidRawGdocBlockToArchieMLString
)
.with({ spanType: P.any }, (s) => spanToHtmlString(s))
.with(P.nullish, () => "")
.exhaustive()
elementText += `${prefix}${fragmentText}`
idx++
}
text += taggedText(elementText)
}
text += paragraphToString(element.paragraph, context)
} else if (element.table) {
text += tableToString(element.table)
}
}

return { text }
}

function parseParagraph(
element: docs_v1.Schema$ParagraphElement
): Span | RawBlockHorizontalRule | null {
// pull out the text

const textRun = element.textRun

// sometimes it's not there, skip this all if so
Expand Down
Loading

0 comments on commit 6ba1911

Please sign in to comment.