Skip to content

Commit

Permalink
fixing link rewrite report (#223)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsaginor authored Sep 11, 2023
1 parent ebf5903 commit e65507d
Showing 1 changed file with 33 additions and 26 deletions.
59 changes: 33 additions & 26 deletions import/tools/importer/import.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ async function fetchLongPath(url) {
}

function shouldRewriteLink(href) {
return (href.startsWith('https://www.golfdigest.com/') ||
return (href.startsWith('https://www.golfdigest.com/') ||
href.startsWith('//www.golfdigest.com/') ||
(href.startsWith('/') && !href.startsWith('//')));
}

async function updateLink(el, url, rewrites, err) {
let href = el.href;

// is this an internal link?
if (shouldRewriteLink(href)) {
// console.log(`rewriting ${href} to franklin url`);
Expand All @@ -89,7 +89,8 @@ async function updateLink(el, url, rewrites, err) {
if (href) {
href = mapToFranklinPath(href);
// console.log(`Replacing internal link ${el.href} with ${href}`);
rewrites.push(`${el.href} to ${href}`);
rewrites.old.push(`${el.href}`);
rewrites.new.push(`${href}`);
el.setAttribute('href', href);
} else {
const redirect = await getRedirect(`https://www.golfdigest.com${oldPath}`);
Expand All @@ -110,7 +111,8 @@ async function updateLink(el, url, rewrites, err) {
if (href) {
href = mapToFranklinPath(href);
// console.log(`Replacing internal link ${el.href} with ${href}`);
rewrites.push(`${el.href} to ${href}`);
rewrites.old.push(`${el.href}`);
rewrites.new.push(`${href}`);
el.setAttribute('href', href);
} else {
console.warn(`${url}: Unable to map ${el.href} Franklin path. Item not found in sitemap or as data-page-path body attribute.`);
Expand All @@ -123,16 +125,21 @@ async function updateLink(el, url, rewrites, err) {

async function updateInternalLinks(dom, url, report) {
const err = [];
const rewrites = [];
const rewrites = {
text: [],
old: [],
new: []
};
const f = async (el) => {
await updateLink(el, url, rewrites, err);
};
const links = dom.querySelectorAll('a');
for (let el of links) {
await f(el);
}
if (report && rewrites.length > 0) {
report.linkRewrites = rewrites.join('\n');
if (report && rewrites.text.length > 0) {
report.linkRewritesOld = rewrites.old.join('\n');
report.linkRewritesNew = rewrites.new.join('\n');
}
if (report && err.length > 0) {
report.linkRewriteErrors = err.join('\n');
Expand Down Expand Up @@ -191,7 +198,7 @@ function getRubric(document) {
el.remove();
return text;
} else {
// try getting rubric from page meta tag
// try getting rubric from page meta tag
const metaTag = document.querySelector('meta[name="parsely-metadata"]');
if (metaTag) {
const val = metaTag.getAttribute('content');
Expand Down Expand Up @@ -311,9 +318,9 @@ function transformArticleDOM(document, templateConfig) {

const articleHero = document.querySelector('.o-ArticleHero');
const imageEmbed = document.querySelector('.o-ImageEmbed');
const imageEmbedCredit = document.querySelector('.o-ImageEmbed__a-Credit') ?
document.querySelector('.o-ImageEmbed__a-Credit') :
document.querySelector('.o-ArticleHero .o-ArticleInfo .a-Credit');
const imageEmbedCredit = document.querySelector('.o-ImageEmbed__a-Credit') ?
document.querySelector('.o-ImageEmbed__a-Credit') :
document.querySelector('.o-ArticleHero .o-ArticleInfo .a-Credit');
const imageEmbedCaption = document.querySelector('.o-ImageEmbed .o-ImageEmbed__a-Caption');
const articleTitle = document.querySelector('.o-AssetTitle');
const articleDescription = document.querySelector('.o-AssetDescription__a-Description');
Expand Down Expand Up @@ -372,7 +379,7 @@ function transformArticleDOM(document, templateConfig) {
if (main.querySelector('.o-ArticleHero__a-Info')) {
main.querySelector('.o-ArticleHero__a-Info').remove();
}

// reinsert original document section separators
articleBody.querySelectorAll('.importer-section-separator').forEach(el => { el.replaceWith(document.createElement('hr')); });

Expand All @@ -395,7 +402,7 @@ function transformArticleDOM(document, templateConfig) {
appendToBlock(sectionBlock, 'Photo Caption', imageEmbedCaptionTxt);
imageEmbedCaption.remove()
}

imageEmbed.insertAdjacentHTML('beforebegin', '<hr/>');
imageEmbed.insertAdjacentElement('afterend', sectionBlock);
sectionBlock.insertAdjacentHTML('afterend', '<hr/>');
Expand Down Expand Up @@ -489,7 +496,7 @@ function transformGalleryDOM(document, templateConfig) {
addEl(main, document.querySelector('.assetDescription'));

main.insertAdjacentHTML('beforeend', '<hr/>');

const gallery = document.querySelector('.photoGalleryPromo');
if (gallery) {
const postcards = gallery.querySelector('.photocards');
Expand Down Expand Up @@ -535,7 +542,7 @@ function transformGalleryDOM(document, templateConfig) {
if (!hasMetadata) {
block.remove();
}

if (slideCount < totalSlides-1) {
main.insertAdjacentHTML('beforeend', '<hr/>');
}
Expand All @@ -557,7 +564,7 @@ function transformGalleryDOM(document, templateConfig) {
if (slideCount < slideInfos.length) {
const slideInfo = slideInfos.item(slideCount);
main.append(slideInfo);

let block = createSectionMetadata(document, main);
let hasMetadata = false;

Expand Down Expand Up @@ -747,7 +754,7 @@ function preprocess({ document, url, html, params }) {
if (isArticle(document)) {
// For articles keep hr tags as section separators.
// These are removed by importer preprocessing step. So, use temporary div tags.
document.querySelectorAll('hr').forEach(el => {
document.querySelectorAll('hr').forEach(el => {
const tmpEl = document.createElement('div');
tmpEl.classList.add('importer-section-separator');
el.replaceWith(tmpEl);
Expand All @@ -756,14 +763,14 @@ function preprocess({ document, url, html, params }) {
}

/**
* Apply DOM operations to the provided document and return
* the root element to be then transformed to Markdown.
* @param {HTMLDocument} document The document
* @param {string} url The url of the page imported
* @param {string} html The raw html (the document is cleaned up during preprocessing)
* @param {object} params Object containing some parameters given by the import process.
* @returns {HTMLElement} The root element to be transformed
*/
* Apply DOM operations to the provided document and return
* the root element to be then transformed to Markdown.
* @param {HTMLDocument} document The document
* @param {string} url The url of the page imported
* @param {string} html The raw html (the document is cleaned up during preprocessing)
* @param {object} params Object containing some parameters given by the import process.
* @returns {HTMLElement} The root element to be transformed
*/
async function transform({document, url, html, params}) {
const docPath = mapToDocumentPath(document, url);
const retObj = await trasformDOM(document, url);
Expand All @@ -784,4 +791,4 @@ export {
export default {
preprocess: preprocess,
transform: transform
}
}

0 comments on commit e65507d

Please sign in to comment.