-
Notifications
You must be signed in to change notification settings - Fork 3
/
_sitemap.js
55 lines (38 loc) · 1.77 KB
/
_sitemap.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
const execSync = require('child_process').execSync;
const fs = require('fs');
const log = require('./_logging.js');
const parser = require('xml2json');
module.exports = {
// downloadSitemap(blogDetails.blogUrl, dirs.data)
downloadSitemap: function(blogUrl, siteMapDir) {
const siteMapURL = `${blogUrl}sitemap.xml`;
log.message(`Attempting to download ${siteMapURL}`);
// -L to follow redirects
const siteMapData = execSync(`curl -L --silent ${siteMapURL}`, function(err, stdout, stderr) {
if (err) { console.log(err) }
if (stderr) { console.log(stderr) }
});
const siteMapXML = siteMapData.toString("utf8");
const siteMapJSON = JSON.parse(parser.toJson(siteMapData));
fs.writeFileSync(`${siteMapDir}/sitemap.xml`, siteMapXML);
log.message(`File written: ${siteMapDir}/sitemap.xml`);
if('sitemapindex' in siteMapJSON) {
console.log('sitemapindex found, downloading paged sitemaps')
let len = siteMapJSON.sitemapindex.sitemap.length || 0;
for (var i = 0; i < len; i++){
const url = siteMapJSON.sitemapindex.sitemap[i].loc;
console.log(`Downloading ${url}`)
const siteMapPage = execSync(`curl --silent ${url}`, function(err, stdout, stderr) {
if (err) { console.log(err) }
if (stderr) { console.log(stderr) }
});
fs.writeFileSync(`${siteMapDir}/sitemap-page${i+1}.xml`, siteMapPage.toString('utf8'));
}
console.log('finished downloading paged sitemaps')
} else if ('urlset' in siteMapJSON) {
console.log('urlset found in sitemap, no further data to download')
} else {
console.log('Unknown format. Inspect downloaded sitemap to see if you need to download more.')
}
}
};