Skip to content

Commit

Permalink
feat: grapher page related research from DB content graph (#3176)
Browse files Browse the repository at this point in the history
This PR replaces the fortunejs content graph with a DB-based alternative, using the links stored in the `posts_links` and `posts_gdoc_links` tables.

![Screenshot 2024-02-07 at 10.54.47.png](https://graphite-user-uploaded-assets-prod.s3.amazonaws.com/0SFFiIjKuUK6UPYHVe6u/8f7371fb-0e80-4d07-8a4e-4fc4c8002c59.png)

This PR also continues the deprecation work started in #3166.

- [x] fully rebake site on staging
- [x] fixed malformed prominent link in content (ourworldindata.org/ instead of https://ourworldindata.org/)
	- [x] https://owid.cloud/wp/wp-admin/post.php?post=34103&action=edit
	- [x] https://owid.cloud/wp/wp-admin/post.php?post=34066&action=edit

**Inconsistency on how grapher chart links are stored in the DB**: there are a handful of chart links stored with the https://ourworldindata.org/grapher prefix. These disappear upon saving the containing articles, which indicates that they were created with an older version of the codebase. I'm then opting for not supporting them in the content graph.
```sql
    SELECT pgl.*, pg.published from posts_gdocs_links pgl
    JOIN posts_gdocs pg on pg.id = pgl.sourceId
    WHERE pgl.target LIKE "https://ourworldindata.org/grapher%"
```
 ### Testing links

Below are some testing links to grapher pages, backlinking to posts in different configurations.

- gdoc: https://ourworldindata.org/grapher/agricultural-export-subsidies
	- [x] http://localhost:3030/grapher/agricultural-export-subsidies
	- [x] http://staging-site-db-content-graph/grapher/agricultural-export-subsidies
- gdocs: https://ourworldindata.org/grapher/pollution-deaths-from-fossil-fuels
	- [x] http://localhost:3030/grapher/pollution-deaths-from-fossil-fuels
	- [x] http://staging-site-db-content-graph/grapher/pollution-deaths-from-fossil-fuels
- wp: https://ourworldindata.org/grapher/dalys-rate-from-all-causes
	- [x] http://localhost:3030/grapher/dalys-rate-from-all-causes
	- [x] http://staging-site-db-content-graph/grapher/dalys-rate-from-all-causes
- wp (with chart redirect): https://ourworldindata.org/grapher/age-standardized-death-rate-from-pm25-pollution-per-100000-vs-gdp-per-capita-int-
	- [x] http://localhost:3030/grapher/age-standardized-death-rate-from-pm25-pollution-per-100000-vs-gdp-per-capita-int-
	- [x] http://staging-site-db-content-graph/grapher/age-standardized-death-rate-from-pm25-pollution-per-100000-vs-gdp-per-capita-int-
- gdoc (with chart redirect): https://ourworldindata.org/grapher/population-long-run-with-projections?time=earliest..2100&country=~OWID_WRL
	- [x] http://localhost:3030/grapher/population-long-run-with-projections?time=earliest..2100&country=~OWID_WRL
	- [x] http://staging-site-db-content-graph/grapher/population-long-run-with-projections?time=earliest..2100&country=~OWID_WRL
- none: https://ourworldindata.org/grapher/death-rates-alcohol-drug-overdoses-by-age-who
	- [x] http://localhost:3030/grapher/death-rates-alcohol-drug-overdoses-by-age-who
	- [x] http://staging-site-db-content-graph/grapher/death-rates-alcohol-drug-overdoses-by-age-who
  • Loading branch information
mlbrgl authored Feb 19, 2024
2 parents e7eef1f + 08fe279 commit ee5bd52
Show file tree
Hide file tree
Showing 17 changed files with 323 additions and 1,004 deletions.
3 changes: 2 additions & 1 deletion adminSiteServer/apiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ import {
postsTable,
setTagsForPost,
getTagsByPostId,
getPermalinks,
} from "../db/model/Post.js"
import {
checkFullDeployFallback,
Expand Down Expand Up @@ -217,7 +218,7 @@ const getReferencesByChartId = async (chartId: number): Promise<References> => {
}

const postsPromise = getPostsForSlugs(slugs)
const permalinksPromise = wpdb.getPermalinks()
const permalinksPromise = getPermalinks()
const publishedLinksToChartPromise = Link.getPublishedLinksTo(
slugs,
OwidGdocLinkType.Grapher
Expand Down
11 changes: 4 additions & 7 deletions baker/GrapherBaker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,7 @@ import {
merge,
partition,
} from "@ourworldindata/utils"
import {
getRelatedArticles,
getRelatedChartsForVariable,
getRelatedResearchAndWritingForVariable,
isWordpressAPIEnabled,
isWordpressDBEnabled,
} from "../db/wpdb.js"
import { isWordpressAPIEnabled, isWordpressDBEnabled } from "../db/wpdb.js"
import fs from "fs-extra"
import * as lodash from "lodash"
import { bakeGraphersToPngs } from "./GrapherImageBaker.js"
Expand All @@ -35,6 +29,8 @@ import { isPathRedirectedToExplorer } from "../explorerAdminServer/ExplorerRedir
import {
getPostEnrichedBySlug,
getPostRelatedCharts,
getRelatedArticles,
getRelatedResearchAndWritingForVariable,
} from "../db/model/Post.js"
import {
JsonError,
Expand Down Expand Up @@ -64,6 +60,7 @@ import { GdocPost } from "../db/model/Gdoc/GdocPost.js"
import { getShortPageCitation } from "../site/gdocs/utils.js"
import { getSlugForTopicTag, getTagToSlugMap } from "./GrapherBakingUtils.js"
import pMap from "p-map"
import { getRelatedChartsForVariable } from "../db/model/Chart.js"

const renderDatapageIfApplicable = async (
grapher: GrapherInterface,
Expand Down
2 changes: 1 addition & 1 deletion baker/algolia/indexChartsToAlgolia.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import * as db from "../../db/db.js"
import { getRelatedArticles } from "../../db/wpdb.js"
import { ALGOLIA_INDEXING } from "../../settings/serverSettings.js"
import { getAlgoliaClient } from "./configureAlgolia.js"
import { isPathRedirectedToExplorer } from "../../explorerAdminServer/ExplorerRedirects.js"
Expand All @@ -8,6 +7,7 @@ import { KeyChartLevel, OwidGdocLinkType, isNil } from "@ourworldindata/utils"
import { MarkdownTextWrap } from "@ourworldindata/components"
import { Pageview } from "../../db/model/Pageview.js"
import { Link } from "../../db/model/Link.js"
import { getRelatedArticles } from "../../db/model/Post.js"

const computeScore = (record: Omit<ChartRecord, "score">): number => {
const { numRelatedArticles, views_7d } = record
Expand Down
20 changes: 0 additions & 20 deletions baker/algolia/indexContentGraphToAlgolia.test.ts

This file was deleted.

161 changes: 0 additions & 161 deletions baker/algolia/indexContentGraphToAlgolia.ts

This file was deleted.

8 changes: 6 additions & 2 deletions baker/algolia/indexToAlgolia.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ import { Pageview } from "../../db/model/Pageview.js"
import { GdocPost } from "../../db/model/Gdoc/GdocPost.js"
import { ArticleBlocks } from "../../site/gdocs/components/ArticleBlocks.js"
import React from "react"
import { getFullPost, getPostsFromSnapshots } from "../../db/model/Post.js"
import {
getFullPost,
getPostTags,
getPostsFromSnapshots,
} from "../../db/model/Post.js"

interface TypeAndImportance {
type: PageType
Expand Down Expand Up @@ -104,7 +108,7 @@ async function generateWordpressRecords(

const post = await formatPost(rawPost, { footnotes: false })
const chunks = generateChunksFromHtmlText(post.html)
const tags = await wpdb.getPostTags(post.id)
const tags = await getPostTags(post.id)
const postTypeAndImportance = getPostTypeAndImportance(post, tags)

let i = 0
Expand Down
45 changes: 24 additions & 21 deletions db/DEPRECATEDwpdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,22 @@ import {
JsonError,
Topic,
} from "@ourworldindata/types"
import { BLOG_SLUG } from "../settings/serverSettings.js"
import { BLOG_SLUG, WORDPRESS_URL } from "../settings/serverSettings.js"
import {
WP_API_ENDPOINT,
apiQuery,
getPostApiBySlugFromApi,
FOR_SYNC_ONLY_WP_API_ENDPOINT,
FOR_SYNC_ONLY_apiQuery,
FOR_SYNC_ONLY_getPostApiBySlugFromApi,
isWordpressAPIEnabled,
singleton,
OWID_API_ENDPOINT,
getEndpointSlugFromType,
getBlockApiFromApi,
graphqlQuery,
ENTRIES_CATEGORY_ID,
FOR_SYNC_ONLY_getEndpointSlugFromType,
FOR_SYNC_ONLY_getBlockApiFromApi,
FOR_SYNC_ONLY_graphqlQuery,
} from "./wpdb.js"
import { getFullPost } from "./model/Post.js"

const DEPRECATED_ENTRIES_CATEGORY_ID = 44
const DEPRECATED_OWID_API_ENDPOINT = `${WORDPRESS_URL}/wp-json/owid/v1`

// Limit not supported with multiple post types: When passing multiple post
// types, the limit is applied to the resulting array of sequentially sorted
// posts (all blog posts, then all pages, ...), so there will be a predominance
Expand All @@ -45,20 +46,20 @@ export const DEPRECATEDgetPosts = async (
const posts: PostRestApi[] = []

for (const postType of postTypes) {
const endpoint = `${WP_API_ENDPOINT}/${getEndpointSlugFromType(
const endpoint = `${FOR_SYNC_ONLY_WP_API_ENDPOINT}/${FOR_SYNC_ONLY_getEndpointSlugFromType(
postType
)}`

// Get number of items to retrieve
const headers = await apiQuery(endpoint, {
const headers = await FOR_SYNC_ONLY_apiQuery(endpoint, {
searchParams: [["per_page", 1]],
returnResponseHeadersOnly: true,
})
const maxAvailable = headers.get("X-WP-TotalPages")
const count = limit && limit < maxAvailable ? limit : maxAvailable

for (let page = 1; page <= Math.ceil(count / perPage); page++) {
const postsCurrentPage = await apiQuery(endpoint, {
const postsCurrentPage = await FOR_SYNC_ONLY_apiQuery(endpoint, {
searchParams: [
["per_page", perPage],
["page", page],
Expand Down Expand Up @@ -93,7 +94,7 @@ export const DEPRECATEDgetPostBySlugFromApi = async (
throw new JsonError(`Need wordpress API to match slug ${slug}`, 404)
}

const postApi = await getPostApiBySlugFromApi(slug)
const postApi = await FOR_SYNC_ONLY_getPostApiBySlugFromApi(slug)

return getFullPost(postApi)
}
Expand All @@ -104,13 +105,15 @@ export const DEPRECATEDgetLatestPostRevision = async (
id: number
): Promise<FullPost> => {
const type = await DEPRECATEDgetPostType(id)
const endpointSlug = getEndpointSlugFromType(type)
const endpointSlug = FOR_SYNC_ONLY_getEndpointSlugFromType(type)

const postApi = await apiQuery(`${WP_API_ENDPOINT}/${endpointSlug}/${id}`)
const postApi = await FOR_SYNC_ONLY_apiQuery(
`${FOR_SYNC_ONLY_WP_API_ENDPOINT}/${endpointSlug}/${id}`
)

const revision = (
await apiQuery(
`${WP_API_ENDPOINT}/${endpointSlug}/${id}/revisions?per_page=1`
await FOR_SYNC_ONLY_apiQuery(
`${FOR_SYNC_ONLY_WP_API_ENDPOINT}/${endpointSlug}/${id}/revisions?per_page=1`
)
)[0]

Expand Down Expand Up @@ -190,7 +193,7 @@ export const DEPRECATEDgetPostType = async (
search: number | string
): Promise<string> => {
const paramName = typeof search === "number" ? "id" : "slug"
return apiQuery(`${OWID_API_ENDPOINT}/type`, {
return FOR_SYNC_ONLY_apiQuery(`${DEPRECATED_OWID_API_ENDPOINT}/type`, {
searchParams: [[paramName, search]],
})
}
Expand Down Expand Up @@ -219,7 +222,7 @@ export const DEPRECATEDgetBlockContentFromApi = async (
): Promise<string | undefined> => {
if (!isWordpressAPIEnabled) return undefined

const post = await getBlockApiFromApi(id)
const post = await FOR_SYNC_ONLY_getBlockApiFromApi(id)

return post.data?.wpBlock?.content ?? undefined
}
Expand All @@ -230,7 +233,7 @@ export const DEPRECATEDgetTopics = async (
if (!isWordpressAPIEnabled) return []

const query = `query {
pages (first: 100, after:"${cursor}", where: {categoryId:${ENTRIES_CATEGORY_ID}} ) {
pages (first: 100, after:"${cursor}", where: {categoryId:${DEPRECATED_ENTRIES_CATEGORY_ID}} ) {
pageInfo {
hasNextPage
endCursor
Expand All @@ -242,7 +245,7 @@ export const DEPRECATEDgetTopics = async (
}
}`

const documents = await graphqlQuery(query, { cursor })
const documents = await FOR_SYNC_ONLY_graphqlQuery(query, { cursor })
const pageInfo = documents.data.pages.pageInfo
const topics: Topic[] = documents.data.pages.nodes
if (topics.length === 0) return []
Expand Down
Loading

0 comments on commit ee5bd52

Please sign in to comment.