-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
162 lines (137 loc) · 4.64 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
'use-strict'
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
// main methods
async function main() {
const monthlyUrl = validateInput(process.argv);
console.log(`fetching data from: ${monthlyUrl}`);
// monthlyPageSelector
const $ = await fetchPage(monthlyUrl);
const daysPromise = [];
$('div.monthly-calendar-container div.monthly-calendar a:not(.is-past)').each((i, element) => {
const dayUrl = absoluteUrl($(element).attr('href'));
daysPromise.push(extractDayDetail(dayUrl));
console.log(1111111111, dayUrl);
});
const days = {};
let errCounter = 0;
const daysResult = await Promise.allSettled(daysPromise);
daysResult.forEach(result => {
if (result.status === 'fulfilled') {
const date = result.value.date;
delete result.value.date;
days[date] = result.value;
} else {
errCounter += 1;
days[`err${errCounter}`] = result;
}
});
const output = {
url: monthlyUrl,
result: {
days
}
};
fs.writeFileSync('./data.json', JSON.stringify(output, null, 2), 'utf-8');
}
async function extractDayDetail(dailyUrl) {
console.log(dailyUrl);
// dailyPageSelector
const $ = await fetchPage(dailyUrl);
const halfDayCardElements = $('div.half-day-card');
const sunriseSunsetElement = $('div.sunrise-sunset');
const temperatureHistoryElement = $('div.temp-history');
return {
date: $('div.subnav-pagination').text().trim().split(' ').pop(),
Day: halfDayParser(halfDayCardElements.eq(0)),
Night: halfDayParser(halfDayCardElements.eq(1)),
SunriseOrSunset: sunriseOrSunsetParser(sunriseSunsetElement),
temperatureHistory: temperatureHistoryParser(temperatureHistoryElement),
};
}
// parsers
function halfDayParser(element) {
const result = {
description: element.find('div.phrase').text(),
iconPath: absoluteUrl(element.find('div.half-day-card-header > img').attr('src'))
};
element.find('p.panel-item').each((i, element) => {
const $ = cheerio.load(element);
const valueSelector = $('span');
const value = valueSelector.text();
valueSelector.remove();
const key = camelizeText($.text());
result[key] = value;
});
return result;
}
function sunriseOrSunsetParser(element) {
const panels = element.find('div.panel');
return {
Sun: panelParser(panels.eq(0)),
Moon: panelParser(panels.eq(1)),
}
}
function panelParser(element) {
const duration = textCleaner(element.find('div.spaced-content').eq(0).text()).split(' ');
return {
duration: `${duration[0]}${upperFirstChar(duration[1])} ${duration[2]}${upperFirstChar(duration[3])}`,
risingTime: textCleaner(element.find('div.spaced-content').eq(1).text()).split(' ').slice(1).join(' '),
fallingTime: textCleaner(element.find('div.spaced-content').eq(2).text()).split(' ').slice(1).join(' '),
};
}
function temperatureHistoryParser(element) {
const result = {};
element.find('div.row').each((i, element) => {
const $ = cheerio.load(element);
const temperatureElement = $('div.temperature');
result[camelizeText($('div.label').text())] = {
low: temperatureElement.first().text(),
high: temperatureElement.last().text(),
};
});
return result;
}
// utils
async function fetchPage(url) {
const response = await axios.get(url);
const html = response.data;
return cheerio.load(html);
}
function textCleaner(text) {
if (!text) return text;
return text.replaceAll('\t', '').replaceAll('\n', ' ').replace(/\s+/g, ' ').trim();
}
function camelizeText(text) {
if (!text) return text;
return text.split(' ').map((word, i) => {
if (i === 0) return word.toLowerCase();
return upperFirstChar(word);
}).join('')
}
function upperFirstChar(text) {
if (!text) return text;
return text[0].toUpperCase() + text.substring(1)
}
function absoluteUrl(relativeUrl) {
return `https://www.accuweather.com${relativeUrl}`
}
function validateInput(arguments) {
const validCommand = 'node index.js {accuweather-monthly-report-url}';
if (arguments.length < 3)
throw new Error(`enter url and try "${validCommand}"`);
else if (arguments.length > 3)
throw new Error(`invalid argument length, try "${validCommand}"`);
const url = arguments[2];
const monthsName = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september',
'october', 'november', 'december'];
const monthsPattern = monthsName.join('|');
const validUrlPattern = new RegExp(`https:\/\/www\.accuweather\.com.*(${monthsPattern})-weather.*`);
if (!validUrlPattern.test(url))
throw new Error(`entered url is invalid, try "${validCommand}"`);
return url;
}
(async () => {
await main();
})();