limit parallel crawls

This commit is contained in:
Rich Harris
2018-08-19 12:49:58 -04:00
parent 14e5c8e761
commit d0c6b9cdca
3 changed files with 1181 additions and 1168 deletions

2336
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -62,7 +62,8 @@
"svelte-loader": "^2.9.0",
"typescript": "^2.8.3",
"walk-sync": "^0.3.2",
"webpack": "^4.8.3"
"webpack": "^4.8.3",
"yootils": "0.0.11"
},
"scripts": {
"cy:open": "cypress open",

View File

@@ -4,6 +4,7 @@ import * as sander from 'sander';
import cheerio from 'cheerio';
import URL from 'url-parse';
import fetch from 'node-fetch';
import * as yootils from 'yootils';
import * as ports from 'port-authority';
import { EventEmitter } from 'events';
import { minify_html } from './utils/minify_html';
@@ -128,12 +129,17 @@ async function execute(emitter: EventEmitter, {
const base = new URL($('base').attr('href') || '/', url.href);
const q = yootils.queue(8);
let promise;
$('a[href]').each((i: number, $a) => {
const url = new URL($a.attribs.href, base.href);
if (url.origin === origin) urls.push(url);
if (url.origin === origin) {
promise = q.add(() => handle(url));
}
});
await Promise.all(urls.map(handle));
await promise;
}
}