Removes all async/await from the extraction pipeline, and adds unit tests for extracted client pages that match a regular expression

This commit is contained in:
freedmand
2018-01-05 14:56:58 -08:00
parent fc8280adea
commit 7588911108
2 changed files with 109 additions and 83 deletions

View File

@@ -98,7 +98,7 @@ function getChunkFiles() {
* @param {number=} extractionDir The directory in which to place the extracted * @param {number=} extractionDir The directory in which to place the extracted
* output. * output.
*/ */
module.exports = async function(includeUrls = null, excludeUrls = null, module.exports = function(includeUrls = null, excludeUrls = null,
extractionDir = OUTPUT_DIR) { extractionDir = OUTPUT_DIR) {
// Set up the server. // Set up the server.
@@ -127,9 +127,8 @@ module.exports = async function(includeUrls = null, excludeUrls = null,
// scraper. The program automatically exits after all the static pages have // scraper. The program automatically exits after all the static pages have
// been scraped from the server that are accessible from the root page (`/`). // been scraped from the server that are accessible from the root page (`/`).
const extractedFiles = []; // keep track of extracted files. const extractedFiles = []; // keep track of extracted files.
const server = await app.listen(PORT); const server = app.listen(PORT, () => {
console.log(`listening on port ${PORT} and beginning extraction`); console.log(`listening on port ${PORT} and beginning extraction`);
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
const spider = new Spider({ const spider = new Spider({
concurrent: 5, concurrent: 5,
@@ -145,10 +144,11 @@ module.exports = async function(includeUrls = null, excludeUrls = null,
reject(); reject();
}, },
// Called when there are no more requests // Called when there are no more requests
done: async () => { done: () => {
await server.close(); server.close(() => {
console.log('Done!'); console.log('Done!');
resolve(); resolve();
});
}, },
headers: { 'user-agent': 'node-spider' }, headers: { 'user-agent': 'node-spider' },
@@ -220,4 +220,5 @@ module.exports = async function(includeUrls = null, excludeUrls = null,
(url) => spider.queue(getFullUrl(url), handleRequest)); (url) => spider.queue(getFullUrl(url), handleRequest));
} }
}); });
});
} }

View File

@@ -322,6 +322,9 @@ function run(env) {
'about/index.html', 'about/index.html',
'api/about/index.html', 'api/about/index.html',
'slow-preload/index.html',
'api/slow-preload/index.html',
'blog/index.html', 'blog/index.html',
'api/blog/index.html', 'api/blog/index.html',
@@ -351,11 +354,33 @@ function run(env) {
'svelte-logo-192.png', 'svelte-logo-192.png',
'svelte-logo-512.png', 'svelte-logo-512.png',
]; ];
// Client scripts that should show up in the extraction directory.
const expectedClientRegexes = [
/client\/_\..*?\.js/,
/client\/about\..*?\.js/,
/client\/blog_\$slug\$\..*?\.js/,
/client\/blog\..*?\.js/,
/client\/main\..*?\.js/,
/client\/show_url\..*?\.js/,
/client\/slow_preload\..*?\.js/,
];
const allPages = walkSync(dest); const allPages = walkSync(dest);
expectedPages.forEach((expectedPage) => { expectedPages.forEach((expectedPage) => {
assert.ok(allPages.includes(expectedPage)); assert.ok(allPages.includes(expectedPage));
}); });
expectedClientRegexes.forEach((expectedRegex) => {
// Ensure each client page regular expression matches at least one
// generated page.
let matched = false;
for (const page of allPages) {
if (expectedRegex.test(page)) {
matched = true;
break;
}
}
assert.ok(matched);
});
}); });
}); });
} }