ignore non-html responses when crawling, and close middleware when done

This commit is contained in:
Rich Harris
2018-02-03 16:17:28 -05:00
parent 8f1d2e0a04
commit 15b1fbf8a6

View File

@@ -63,7 +63,8 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
return fetch(url, opts); return fetch(url, opts);
}; };
app.use(require('./middleware')()); // TODO this is filthy const middleware = require('./middleware')(); // TODO this is filthy
app.use(middleware);
const server = app.listen(PORT); const server = app.listen(PORT);
const seen = new Set(); const seen = new Set();
@@ -77,19 +78,21 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
return fetch(url.href) return fetch(url.href)
.then(r => { .then(r => {
save(r); save(r);
return r.text();
})
.then(body => {
const $ = cheerio.load(body);
const hrefs = [];
$('a[href]').each((i, $a) => { if (r.headers.get('Content-Type') === 'text/html') {
hrefs.push($a.attribs.href); return r.text().then(body => {
}); const $ = cheerio.load(body);
const hrefs = [];
return hrefs.reduce((promise, href) => { $('a[href]').each((i, $a) => {
return promise.then(() => handle(new URL(href, url.href))); hrefs.push($a.attribs.href);
}, Promise.resolve()); });
return hrefs.reduce((promise, href) => {
return promise.then(() => handle(new URL(href, url.href)));
}, Promise.resolve());
});
}
}) })
.catch(err => { .catch(err => {
console.error(`Error rendering ${url.pathname}: ${err.message}`); console.error(`Error rendering ${url.pathname}: ${err.message}`);
@@ -97,5 +100,8 @@ export default function exporter({ src, dest }) { // TODO dest is a terrible nam
} }
return handle(new URL(origin)) // TODO all static routes return handle(new URL(origin)) // TODO all static routes
.then(() => server.close()); .then(() => {
server.close();
middleware.close();
});
} }