ignore non-html responses when crawling, and close middleware when done

This commit is contained in:
Rich Harris
2018-02-03 16:17:28 -05:00
parent 8f1d2e0a04
commit 15b1fbf8a6

View File

@@ -63,7 +63,8 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
return fetch(url, opts);
};
app.use(require('./middleware')()); // TODO this is filthy
const middleware = require('./middleware')(); // TODO this is filthy
app.use(middleware);
const server = app.listen(PORT);
const seen = new Set();
@@ -77,19 +78,21 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
return fetch(url.href)
.then(r => {
save(r);
return r.text();
})
.then(body => {
const $ = cheerio.load(body);
const hrefs = [];
$('a[href]').each((i, $a) => {
hrefs.push($a.attribs.href);
});
if (r.headers.get('Content-Type') === 'text/html') {
return r.text().then(body => {
const $ = cheerio.load(body);
const hrefs = [];
return hrefs.reduce((promise, href) => {
return promise.then(() => handle(new URL(href, url.href)));
}, Promise.resolve());
$('a[href]').each((i, $a) => {
hrefs.push($a.attribs.href);
});
return hrefs.reduce((promise, href) => {
return promise.then(() => handle(new URL(href, url.href)));
}, Promise.resolve());
});
}
})
.catch(err => {
console.error(`Error rendering ${url.pathname}: ${err.message}`);
@@ -97,5 +100,8 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
}
return handle(new URL(origin)) // TODO all static routes
.then(() => server.close());
.then(() => {
server.close();
middleware.close();
});
}