Skip to content

Commit

Permalink
feat: add Cloudflare blocking benchmarking (#114)
Browse files Browse the repository at this point in the history
  • Loading branch information
barjin authored Nov 2, 2023
1 parent 8bcd804 commit 0ed3fdc
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 3 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
"prepublishOnly": "npm run build",
"lint": "eslint src test",
"lint:fix": "eslint src test --fix",
"test": "node --experimental-vm-modules ./node_modules/jest/bin/jest.js --coverage"
"test": "node --experimental-vm-modules ./node_modules/jest/bin/jest.js --coverage",
"pretest:blocking": "npm run build",
"test:blocking": "ts-node -T ./test/live-testing/index.js"
},
"author": {
"name": "Apify",
Expand Down
74 changes: 74 additions & 0 deletions test/live-testing/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import { setTimeout } from 'timers/promises';
import got from 'got';
import { gotScraping } from '../../dist/index.js';

async function processUrls(gotImplementation, urls) {
let passed = 0;
let blocked = 0;
let failed = 0;

let url = urls.shift();
while (url) {
try {
// console.log(`crawling ${url}`);
const request = gotImplementation.get(url);

const result = await Promise.race([
request,
setTimeout(5000),
]);

if (!result?.body) {
request.cancel();
throw new Error('timeout');
}

// console.log(`crawled ${url}`);

if (result.body.includes('Just a moment...')) {
blocked++;
} else {
passed++;
}
} catch (e) {
failed++;
// console.error(e.message);
}

url = urls.shift();
}

// console.log('done!');
return { passed, blocked, failed };
}

async function runInParallel(implementation, urls) {
const localUrls = [...urls];
const partialResults = await Promise.all(Array.from({ length: 5 }, () => processUrls(implementation, localUrls)));

return partialResults.reduce((acc, { passed, blocked, failed }) => {
acc.passed += passed;
acc.blocked += blocked;
acc.failed += failed;
return acc;
}, { passed: 0, blocked: 0, failed: 0 });
}

(async () => {
const { body } = await got.get('https://raw.githubusercontent.com/apify/fingerprint-suite/master/test/antibot-services/live-testing/cloudflare-websites.csv');
const urls = body.split('\n');

const [gotScrapingResults, gotResults] = await Promise.all([
runInParallel(gotScraping, urls),
runInParallel(got, urls),
]);

console.log('got-scraping');

Check warning on line 66 in test/live-testing/index.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement
console.log(gotScrapingResults);

Check warning on line 67 in test/live-testing/index.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement

console.log('---');

Check warning on line 69 in test/live-testing/index.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement
console.log('got');

Check warning on line 70 in test/live-testing/index.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement
console.log(gotResults);

Check warning on line 71 in test/live-testing/index.js

View workflow job for this annotation

GitHub Actions / Lint

Unexpected console statement

process.exit(0);
})();

0 comments on commit 0ed3fdc

Please sign in to comment.