forked from AFornio/TheRealEmailExtractor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
68 lines (56 loc) · 1.84 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
const puppeteer = require("puppeteer");
var fs = require("fs");
let parsedData = [];
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
// RUN
// node index.js 'url' depth
const args = process.argv.slice(2);
let url = args[0];
let depth = args[1];
await page.goto(url, { waitUntil: "networkidle2" });
// get emails from first level
let auxEmails = await page.evaluate(() => {
const htmlTag = document.querySelector("html");
return htmlTag.innerHTML.match(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)/gi);
});
if (auxEmails === null) {
auxEmails = [];
}
if (depth == 2) {
// let's go!
let links = await page.evaluate(() => {
const anchorTags = document.querySelectorAll("a");
let hrefs = [];
for (let index = 0; index < anchorTags.length; index++) {
const element = anchorTags[index];
hrefs.push(element.href);
}
return hrefs;
});
for (const [i, link] of links.entries()) {
console.log(`Link ${i} of ${links.length - 1} : ${link}`);
try {
await page.goto(link, { waitUntil: "networkidle2" });
let emailsDepth2 = await page.evaluate(() => {
const htmlTag = document.querySelector("html");
return htmlTag.innerHTML.match(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)/gi);
});
if (emailsDepth2) {
auxEmails = auxEmails.concat(emailsDepth2);
}
} catch (error) {
console.log(`\nERROR: Link ${i}:${link}`);
console.log(`ERROR: ${error}\n`);
}
}
}
// clean & remove duplicates
let emails = [];
auxEmails.forEach((email) => {
if (!emails.includes(email)) emails.push(email);
});
fs.writeFile("emails.json", JSON.stringify(emails, null, 4), () => {});
browser.close();
})();