From 30ddb3dd7e17bb3ee19c8fc164a785c6ddc58417 Mon Sep 17 00:00:00 2001 From: freedmand Date: Fri, 5 Jan 2018 14:16:30 -0800 Subject: [PATCH 1/6] Adds a `sapper extract` CLI command, which scrapes the server to run as a static website starting at the site's root. TESTED=Basic unit test ensuring relevant routes are added. --- cli/index.js | 5 +- lib/utils/extract.js | 223 ++++++++++++++++++ package-lock.json | 526 +++++++++++++++++++++++++++++++++++++++---- package.json | 4 + test/app/.gitignore | 3 +- test/common/test.js | 55 ++++- 6 files changed, 770 insertions(+), 46 deletions(-) create mode 100644 lib/utils/extract.js diff --git a/cli/index.js b/cli/index.js index 4c50494..2f7ebad 100755 --- a/cli/index.js +++ b/cli/index.js @@ -5,4 +5,7 @@ const cmd = process.argv[2]; if (cmd === 'build') { process.env.NODE_ENV = 'production'; require('../lib/build.js')(); -} \ No newline at end of file +} else if (cmd === 'extract') { + process.env.NODE_ENV = 'production'; + require('../lib/utils/extract.js')(); +} diff --git a/lib/utils/extract.js b/lib/utils/extract.js new file mode 100644 index 0000000..36579da --- /dev/null +++ b/lib/utils/extract.js @@ -0,0 +1,223 @@ +const fs = require('fs-extra'); +const app = require('express')(); +const compression = require('compression'); +const sapper = require('../index.js'); +const static = require('serve-static'); +const Spider = require('node-spider'); +const path = require('path'); + +const { PORT = 3000, OUTPUT_DIR = 'dist' } = process.env; +const { dest = sapperDest } = require('../config.js'); + +const prefix = `http://localhost:${PORT}`; + +/** + * Returns the full URL of the specified path in the server. + * @param {string} url The path for which to get the complete URL. + * @return {string} The full URL. + */ +function getFullUrl(url) { + if (url.startsWith(prefix)) return url; + return `${prefix}${url}`; +} + +/** + * Returns the extension on the URL or '' if there is none. + * @param {string} url The URL. + * @return {string} The URL's extension or the empty string if the URL has no + * extension. + */ +function getExtension(url) { + const splits = url.split('.'); + let extension = splits[splits.length - 1].trim(); + if (!/^[a-zA-Z0-9]+$/.test(extension) || extension.length > 10) { + // Clear the extension if it is not alphanumeric or is long enough to + // signify it may just be a hash value or something. + extension = ''; + } + return extension; +} + +/** + * Returns the relative path for the specified URL, adding index.html if the URL + * ends in `/`. This makes the URL function well in a static site. + * @param {string} url The URL for which to retrieve the relative path. + * @return {string} A URL that starts with / that is relative to the server + * root. The URL will add index.html if it ends with `/`. + */ +function relativePath(url) { + if (url.startsWith(prefix)) return relativePath(url.substr(prefix.length)); + if (url.endsWith('/')) url += 'index.html'; + if (getExtension(url) == '') url += '/index.html'; + if (url.startsWith('/')) return url; + throw new Error('Bad url'); +} + +/** + * Returns the Sapper API route for the specified URL path. + * @param {string} url The absolute or relative URL. + * @return {string} The URL with /api/ in front. + */ +function apiPath(url) { + if (url.startsWith(prefix)) { + return `${prefix}/api${url.substr(prefix.length)}`; + } + return `/api${url}`; +} + +/** + * Returns whether the specified URL is on the server or an external link. + * @param {string} url The URL. + * @return {boolean} True if the URL is on the server. + */ +function filter(url) { + return url.startsWith('/') || url.startsWith(getFullUrl('/')); +} + +/** + * Retrieves chunk files that are normally cached for offline use in the service + * worker. + * @return {!Array} + */ +function getChunkFiles() { + const clientInfo = + fs.readJsonSync(path.join(sapperDest, 'stats.client.json')); + const chunkFiles = clientInfo.assets.map(chunk => `/client/${chunk.name}`); + return chunkFiles; +} + +/** + * Exports the Sapper app as a static website by starting at the root and + * crawling pages that are linked, their /api/ pages, and webpack routes, as + * well as copying assets. + * @param {?Array=} includeUrls If non-null, a set of additional URLs to + * scrape in the extraction. This should only be set if there are routes + * that cannot be reached from the root. + * @param {?Array=} excludeUrls If non-null, a set of URLs to avoid + * scraping in the extraction. + * @param {number=} extractionDir The directory in which to place the extracted + * output. + */ +module.exports = async function(includeUrls = null, excludeUrls = null, + extractionDir = OUTPUT_DIR) { + // Set up the server. + + // this allows us to do e.g. `fetch('/api/blog')` on the server + const fetch = require('node-fetch'); + global.fetch = (url, opts) => { + if (url[0] === '/') url = `http://localhost:${PORT}${url}`; + return fetch(url, opts); + }; + + app.use(compression({ threshold: 0 })); + + app.use(static('assets')); + + app.use(sapper()); + + // Clean the output directory and copy assets in. + fs.removeSync(extractionDir); + fs.copySync('assets', extractionDir); + + // If exclude URLs are set, normalize them. + if (excludeUrls == null) excludeUrls = []; + excludeUrls = excludeUrls.map((url) => getFullUrl(url)); + + // The crux of the extraction, chaining the traditional server call with a web + // scraper. The program automatically exits after all the static pages have + // been scraped from the server that are accessible from the root page (`/`). + const extractedFiles = []; // keep track of extracted files. + const server = await app.listen(PORT); + console.log(`listening on port ${PORT} and beginning extraction`); + + return new Promise((resolve, reject) => { + const spider = new Spider({ + concurrent: 5, + delay: 0, + logs: process.stderr, + allowDuplicates: false, + catchErrors: true, + addReferrer: false, + xhr: false, + keepAlive: false, + error: (err, url) => { + console.error(`ERROR ${err} at ${url}`); + reject(); + }, + // Called when there are no more requests + done: async () => { + await server.close(); + console.log('Done!'); + resolve(); + }, + + headers: { 'user-agent': 'node-spider' }, + // Use a binary encoding to preserve image files. + encoding: 'binary' + }); + + // The primary logic to handle a scraped page. + const handleRequest = (doc) => { + // Only deal with the page if it is on the server, i.e. it is not an + // external link. + if (!filter(doc.url)) return; + // Skip URL if it is in the exclude list. + if (excludeUrls.includes(getFullUrl(doc.url))) return; + + // Grab the page's relative path and write the page contents to a local + // file. + const relPath = relativePath(doc.url); + extractedFiles.push(relPath); + console.log(`GOT ${relPath}`); // static page url + fs.outputFileSync(path.join(extractionDir, relPath), doc.res.body, + {encoding: 'binary'}); + + /** + * Resolves and checks if a given URL is local; if so, adds it to the + * scraping queue. + * @param {string} url The URL to process. + */ + const process = (url) => { + // Remove trailing hash if relevant. + url = url.split('#')[0]; + // Resolve URL relative to server root. + url = doc.resolve(url); + // Crawl more if the URL is on the server. + if (filter(url)) spider.queue(url, handleRequest); + }; + + const extension = getExtension(relPath); + if (extension == 'html') { + // Grab src and href attributes from html pages. + doc.$('[src]').each((i, elem) => { + process(doc.$(elem).attr('src')); + }); + doc.$('[href]').each((i, elem) => { + process(doc.$(elem).attr('href')); + }); + } + + if (doc.url.endsWith('/service-worker.js')) { + // Grab additional routes. + const chunkFiles = getChunkFiles(); + chunkFiles.forEach( + (url) => spider.queue(getFullUrl(url), handleRequest)); + } + + if (relPath.endsWith('/index.html') && !relPath.startsWith('/api/')) { + // Attempt to grab the /api/ version of a page that seems to be a + // basic route. + spider.queue(apiPath(doc.url), handleRequest); + } + }; + + // Start crawling with the document root and the service worker. + spider.queue(getFullUrl('/'), handleRequest); + spider.queue(getFullUrl('/service-worker.js'), handleRequest); + + if (includeUrls !== null) { + includeUrls.forEach( + (url) => spider.queue(getFullUrl(url), handleRequest)); + } + }); +} diff --git a/package-lock.json b/package-lock.json index 4d0d571..59f3edb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,6 @@ "version": "1.3.4", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.4.tgz", "integrity": "sha1-hiRnWMfdbSGmR0/whKR0DsBesh8=", - "dev": true, "requires": { "mime-types": "2.1.17", "negotiator": "0.6.1" @@ -343,6 +342,39 @@ "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-1.11.0.tgz", "integrity": "sha1-RqoXUftqL5PuXmibsQh9SxTGwgU=" }, + "bl": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/bl/-/bl-1.0.3.tgz", + "integrity": "sha1-/FQhoo/UImA2w7OJGmaiW8ZNIm4=", + "requires": { + "readable-stream": "2.0.6" + }, + "dependencies": { + "readable-stream": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.0.6.tgz", + "integrity": "sha1-j5A0HmilPMySh4jaz80Rs265t44=", + "requires": { + "core-util-is": "1.0.2", + "inherits": "2.0.3", + "isarray": "1.0.0", + "process-nextick-args": "1.0.7", + "string_decoder": "0.10.31", + "util-deprecate": "1.0.2" + } + }, + "string_decoder": { + "version": "0.10.31", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", + "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" + } + } + }, + "bluebird": { + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-2.11.0.tgz", + "integrity": "sha1-U0uQM8AiyVecVro7Plpcqvu2UOE=" + }, "bn.js": { "version": "4.11.8", "resolved": "https://registry.npmjs.org/bn.js/-/bn.js-4.11.8.tgz", @@ -377,6 +409,11 @@ } } }, + "boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" + }, "boom": { "version": "4.3.1", "resolved": "https://registry.npmjs.org/boom/-/boom-4.3.1.tgz", @@ -518,8 +555,7 @@ "bytes": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.0.0.tgz", - "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=", - "dev": true + "integrity": "sha1-0ygVQE1olpn4Wk6k+odV3ROpYEg=" }, "caller-path": { "version": "0.1.0", @@ -608,6 +644,25 @@ "integrity": "sha1-tUc7M9yXxCTl2Y3IfVXU2KKci/I=", "dev": true }, + "cheerio": { + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-0.19.0.tgz", + "integrity": "sha1-dy5wFfLuKZZQltcepBdbdas1SSU=", + "requires": { + "css-select": "1.0.0", + "dom-serializer": "0.1.0", + "entities": "1.1.1", + "htmlparser2": "3.8.3", + "lodash": "3.10.1" + }, + "dependencies": { + "lodash": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-3.10.1.tgz", + "integrity": "sha1-W/Rejkm6QYnhfUgnid/RW9FAt7Y=" + } + } + }, "chokidar": { "version": "1.7.0", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-1.7.0.tgz", @@ -779,7 +834,6 @@ "version": "1.0.5", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.5.tgz", "integrity": "sha1-k4NwpXtKUd6ix3wV1cX9+JUWQAk=", - "dev": true, "requires": { "delayed-stream": "1.0.0" } @@ -787,8 +841,7 @@ "commander": { "version": "2.11.0", "resolved": "https://registry.npmjs.org/commander/-/commander-2.11.0.tgz", - "integrity": "sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ==", - "dev": true + "integrity": "sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ==" }, "compare-versions": { "version": "2.0.1", @@ -796,6 +849,38 @@ "integrity": "sha1-Htwfk2h/2XoyXFn1XkWgfbEGrKY=", "dev": true }, + "compressible": { + "version": "2.0.12", + "resolved": "https://registry.npmjs.org/compressible/-/compressible-2.0.12.tgz", + "integrity": "sha1-xZpcmdt2dn6YdlAOJx72OzSTvWY=", + "requires": { + "mime-db": "1.30.0" + } + }, + "compression": { + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/compression/-/compression-1.7.1.tgz", + "integrity": "sha1-7/JgPvwuIs+G810uuTWJ+YdTc9s=", + "requires": { + "accepts": "1.3.4", + "bytes": "3.0.0", + "compressible": "2.0.12", + "debug": "2.6.9", + "on-headers": "1.0.1", + "safe-buffer": "5.1.1", + "vary": "1.1.2" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + } + } + }, "concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -975,6 +1060,17 @@ "source-list-map": "2.0.0" } }, + "css-select": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.0.0.tgz", + "integrity": "sha1-sRIcpRhI3SZOIkTQWM7iVN7rRLA=", + "requires": { + "boolbase": "1.0.0", + "css-what": "1.0.0", + "domutils": "1.4.3", + "nth-check": "1.0.1" + } + }, "css-selector-tokenizer": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/css-selector-tokenizer/-/css-selector-tokenizer-0.7.0.tgz", @@ -986,6 +1082,11 @@ "regexpu-core": "1.0.0" } }, + "css-what": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-1.0.0.tgz", + "integrity": "sha1-18wt9FGAZm+Z0rFEYmOUaeAPc2w=" + }, "cssesc": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/cssesc/-/cssesc-0.1.0.tgz", @@ -1042,6 +1143,11 @@ "source-map": "0.5.7" } }, + "ctype": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/ctype/-/ctype-0.5.3.tgz", + "integrity": "sha1-gsGMJGH3QRTvFsE1IkrQuRRMoS8=" + }, "currently-unhandled": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/currently-unhandled/-/currently-unhandled-0.4.1.tgz", @@ -1167,8 +1273,7 @@ "delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", - "dev": true + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" }, "depd": { "version": "1.1.1", @@ -1216,11 +1321,48 @@ "esutils": "2.0.2" } }, + "dom-serializer": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz", + "integrity": "sha1-BzxpdUbOB4DOI75KKOKT5AvDDII=", + "requires": { + "domelementtype": "1.1.3", + "entities": "1.1.1" + }, + "dependencies": { + "domelementtype": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", + "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" + } + } + }, "domain-browser": { "version": "1.1.7", "resolved": "https://registry.npmjs.org/domain-browser/-/domain-browser-1.1.7.tgz", "integrity": "sha1-hnqksJP6oF8d4IwG9NeyH9+GmLw=" }, + "domelementtype": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz", + "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=" + }, + "domhandler": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.3.0.tgz", + "integrity": "sha1-LeWaCCLVAn+r/28DLCsloqir5zg=", + "requires": { + "domelementtype": "1.3.0" + } + }, + "domutils": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.4.3.tgz", + "integrity": "sha1-CGVRN5bGswYDGFDhdVFrr4C3Km8=", + "requires": { + "domelementtype": "1.3.0" + } + }, "duplexer": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.1.tgz", @@ -1280,6 +1422,28 @@ "ms": "2.0.0" } }, + "fs-extra": { + "version": "0.30.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", + "integrity": "sha1-8jP/zAjU2n1DLapEl3aYnbHfk/A=", + "dev": true, + "requires": { + "graceful-fs": "4.1.11", + "jsonfile": "2.4.0", + "klaw": "1.3.1", + "path-is-absolute": "1.0.1", + "rimraf": "2.6.2" + } + }, + "jsonfile": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz", + "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=", + "dev": true, + "requires": { + "graceful-fs": "4.1.11" + } + }, "minimist": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", @@ -1374,6 +1538,16 @@ } } }, + "ensure-posix-path": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/ensure-posix-path/-/ensure-posix-path-1.0.2.tgz", + "integrity": "sha1-pls+QtC3HPxYXrd0+ZQ8jZuRsMI=" + }, + "entities": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", + "integrity": "sha1-blwtClYhtdra7O+AuQ7ftc13cvA=" + }, "errno": { "version": "0.1.6", "resolved": "https://registry.npmjs.org/errno/-/errno-0.1.6.tgz", @@ -1851,8 +2025,7 @@ "extend": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.1.tgz", - "integrity": "sha1-p1Xqe8Gt/MWjHOfnYtuq3F5jZEQ=", - "dev": true + "integrity": "sha1-p1Xqe8Gt/MWjHOfnYtuq3F5jZEQ=" }, "external-editor": { "version": "2.1.0", @@ -2052,8 +2225,7 @@ "forever-agent": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", - "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=", - "dev": true + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" }, "form-data": { "version": "2.3.1", @@ -2085,16 +2257,13 @@ "dev": true }, "fs-extra": { - "version": "0.30.0", - "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-0.30.0.tgz", - "integrity": "sha1-8jP/zAjU2n1DLapEl3aYnbHfk/A=", - "dev": true, + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-5.0.0.tgz", + "integrity": "sha512-66Pm4RYbjzdyeuqudYqhFiNBbCIuI9kgRqLPSHIlXHidW8NIQtVdkM1yeZ4lXwuhbTETv3EUGMNHAAw6hiundQ==", "requires": { "graceful-fs": "4.1.11", - "jsonfile": "2.4.0", - "klaw": "1.3.1", - "path-is-absolute": "1.0.1", - "rimraf": "2.6.2" + "jsonfile": "4.0.0", + "universalify": "0.1.1" } }, "fs.realpath": { @@ -3023,6 +3192,19 @@ "integrity": "sha1-GwqzvVU7Kg1jmdKcDj6gslIHgyc=", "dev": true }, + "generate-function": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/generate-function/-/generate-function-2.0.0.tgz", + "integrity": "sha1-aFj+fAlpt9TpCTM3ZHrHn2DfvnQ=" + }, + "generate-object-property": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/generate-object-property/-/generate-object-property-1.2.0.tgz", + "integrity": "sha1-nA4cQDCM6AT0eDYYuTf6iPmdUNA=", + "requires": { + "is-property": "1.0.2" + } + }, "get-caller-file": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-1.0.2.tgz", @@ -3144,7 +3326,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/has-ansi/-/has-ansi-2.0.0.tgz", "integrity": "sha1-NPUEnOHs3ysGSa8+8k5F7TVBbZE=", - "dev": true, "requires": { "ansi-regex": "2.1.1" } @@ -3227,6 +3408,55 @@ "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-1.2.1.tgz", "integrity": "sha1-DfKTUfByEWNRXfueVUPl9u7VFi8=" }, + "htmlparser2": { + "version": "3.8.3", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.8.3.tgz", + "integrity": "sha1-mWwosZFRaovoZQGn15dX5ccMEGg=", + "requires": { + "domelementtype": "1.3.0", + "domhandler": "2.3.0", + "domutils": "1.5.1", + "entities": "1.0.0", + "readable-stream": "1.1.14" + }, + "dependencies": { + "domutils": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", + "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", + "requires": { + "dom-serializer": "0.1.0", + "domelementtype": "1.3.0" + } + }, + "entities": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-1.0.0.tgz", + "integrity": "sha1-sph6o4ITR/zeZCsk/fyeT7cSvyY=" + }, + "isarray": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz", + "integrity": "sha1-ihis/Kmo9Bd+Cav8YDiTmwXR7t8=" + }, + "readable-stream": { + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", + "integrity": "sha1-fPTFTvZI44EwhMY23SB54WbAgdk=", + "requires": { + "core-util-is": "1.0.2", + "inherits": "2.0.3", + "isarray": "0.0.1", + "string_decoder": "0.10.31" + } + }, + "string_decoder": { + "version": "0.10.31", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz", + "integrity": "sha1-YuIDvEF2bGwoyfyEMB2rHFMQ+pQ=" + } + } + }, "http-errors": { "version": "1.6.2", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.6.2.tgz", @@ -3507,6 +3737,17 @@ "is-extglob": "1.0.0" } }, + "is-my-json-valid": { + "version": "2.17.1", + "resolved": "https://registry.npmjs.org/is-my-json-valid/-/is-my-json-valid-2.17.1.tgz", + "integrity": "sha512-Q2khNw+oBlWuaYvEEHtKSw/pCxD2L5Rc1C+UQme9X6JdRDh7m5D7HkozA0qa3DUkQ6VzCnEm8mVIQPyIRkI5sQ==", + "requires": { + "generate-function": "2.0.0", + "generate-object-property": "1.2.0", + "jsonpointer": "4.0.1", + "xtend": "4.0.1" + } + }, "is-number": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-2.1.0.tgz", @@ -3561,6 +3802,11 @@ "integrity": "sha1-eaKp7OfwlugPNtKy87wWwf9L8/o=", "dev": true }, + "is-property": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/is-property/-/is-property-1.0.2.tgz", + "integrity": "sha1-V/4cTkhHTt1lsJkR8msc1Ald2oQ=" + }, "is-regex": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.0.4.tgz", @@ -3635,8 +3881,7 @@ "isstream": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", - "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=", - "dev": true + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" }, "items": { "version": "2.1.1", @@ -3723,8 +3968,7 @@ "json-stringify-safe": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", - "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=", - "dev": true + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" }, "json5": { "version": "0.5.1", @@ -3732,10 +3976,9 @@ "integrity": "sha1-Hq3nrMASA0rYTiOWdn6tn6VJWCE=" }, "jsonfile": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-2.4.0.tgz", - "integrity": "sha1-NzaitCi4e72gzIO1P6PWM6NcKug=", - "dev": true, + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", + "integrity": "sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=", "requires": { "graceful-fs": "4.1.11" } @@ -3746,6 +3989,11 @@ "integrity": "sha1-LHS27kHZPKUbe1qu6PUDYx0lKnM=", "dev": true }, + "jsonpointer": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-4.0.1.tgz", + "integrity": "sha1-T9kss04OnbPInIYi7PUfm5eMbLk=" + }, "jsprim": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", @@ -3910,6 +4158,14 @@ "integrity": "sha1-5WqpTEyAVaFkBKBnS3jyFffI4ZQ=", "dev": true }, + "matcher-collection": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/matcher-collection/-/matcher-collection-1.0.5.tgz", + "integrity": "sha512-nUCmzKipcJEwYsBVAFh5P+d7JBuhJaW1xs85Hara9xuMLqtCVUrW6DSC0JVIkluxEH2W45nPBM/wjHtBXa/tYA==", + "requires": { + "minimatch": "3.0.4" + } + }, "math-expression-evaluator": { "version": "1.2.17", "resolved": "https://registry.npmjs.org/math-expression-evaluator/-/math-expression-evaluator-1.2.17.tgz", @@ -4114,14 +4370,12 @@ "mime-db": { "version": "1.30.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.30.0.tgz", - "integrity": "sha1-dMZD2i3Z1qRTmZY0ZbJtXKfXHwE=", - "dev": true + "integrity": "sha1-dMZD2i3Z1qRTmZY0ZbJtXKfXHwE=" }, "mime-types": { "version": "2.1.17", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.17.tgz", "integrity": "sha1-Cdejk/A+mVp5+K+Fe3Cp4KsWVXo=", - "dev": true, "requires": { "mime-db": "1.30.0" } @@ -4220,8 +4474,7 @@ "ms": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", - "dev": true + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" }, "mute-stream": { "version": "0.0.7", @@ -4244,8 +4497,7 @@ "negotiator": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.1.tgz", - "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=", - "dev": true + "integrity": "sha1-KzJxhOiZIQEXeyhWP7XnECrNDKk=" }, "nightmare": { "version": "2.10.0", @@ -4319,6 +4571,171 @@ "vm-browserify": "0.0.4" } }, + "node-spider": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/node-spider/-/node-spider-1.4.1.tgz", + "integrity": "sha1-osLYKkQhjAXe6W/KCo+P7bxO0n4=", + "requires": { + "cheerio": "0.19.0", + "request": "2.61.0" + }, + "dependencies": { + "ansi-styles": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", + "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=" + }, + "asn1": { + "version": "0.1.11", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.1.11.tgz", + "integrity": "sha1-VZvhg3bQik7E2+gId9J4GGObLfc=" + }, + "assert-plus": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.1.5.tgz", + "integrity": "sha1-7nQAlBMALYTOxyGcasgRgS5yMWA=" + }, + "aws-sign2": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.5.0.tgz", + "integrity": "sha1-xXED96F/wDfwLXwuZLYC6iI/fWM=" + }, + "boom": { + "version": "2.10.1", + "resolved": "https://registry.npmjs.org/boom/-/boom-2.10.1.tgz", + "integrity": "sha1-OciRjO/1eZ+D+UkqhI9iWt0Mdm8=", + "requires": { + "hoek": "2.16.3" + } + }, + "caseless": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.11.0.tgz", + "integrity": "sha1-cVuW6phBWTzDMGeSP17GDr2k99c=" + }, + "chalk": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", + "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", + "requires": { + "ansi-styles": "2.2.1", + "escape-string-regexp": "1.0.5", + "has-ansi": "2.0.0", + "strip-ansi": "3.0.1", + "supports-color": "2.0.0" + } + }, + "cryptiles": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-2.0.5.tgz", + "integrity": "sha1-O9/s3GCBR8HGcgL6KR59ylnqo7g=", + "requires": { + "boom": "2.10.1" + } + }, + "form-data": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-1.0.1.tgz", + "integrity": "sha1-rjFduaSQf6BlUCMEpm13M0de43w=", + "requires": { + "async": "2.6.0", + "combined-stream": "1.0.5", + "mime-types": "2.1.17" + } + }, + "har-validator": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-1.8.0.tgz", + "integrity": "sha1-2DhCsOtMQ1lgrrEIoGejqpTA7rI=", + "requires": { + "bluebird": "2.11.0", + "chalk": "1.1.3", + "commander": "2.11.0", + "is-my-json-valid": "2.17.1" + } + }, + "hawk": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/hawk/-/hawk-3.1.3.tgz", + "integrity": "sha1-B4REvXwWQLD+VA0sm3PVlnjo4cQ=", + "requires": { + "boom": "2.10.1", + "cryptiles": "2.0.5", + "hoek": "2.16.3", + "sntp": "1.0.9" + } + }, + "hoek": { + "version": "2.16.3", + "resolved": "https://registry.npmjs.org/hoek/-/hoek-2.16.3.tgz", + "integrity": "sha1-ILt0A9POo5jpHcRxCo/xuCdKJe0=" + }, + "http-signature": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-0.11.0.tgz", + "integrity": "sha1-F5bPZ6ABrVzWhJ3KCZFIXwkIn+Y=", + "requires": { + "asn1": "0.1.11", + "assert-plus": "0.1.5", + "ctype": "0.5.3" + } + }, + "node-uuid": { + "version": "1.4.8", + "resolved": "https://registry.npmjs.org/node-uuid/-/node-uuid-1.4.8.tgz", + "integrity": "sha1-sEDrCSOWivq/jTL7HxfxFn/auQc=" + }, + "qs": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-4.0.0.tgz", + "integrity": "sha1-wx2bdOwn33XlQ6hseHKO2NRiNgc=" + }, + "request": { + "version": "2.61.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.61.0.tgz", + "integrity": "sha1-aXPLKslIhfAmk/VU7sZEgdYBP58=", + "requires": { + "aws-sign2": "0.5.0", + "bl": "1.0.3", + "caseless": "0.11.0", + "combined-stream": "1.0.5", + "extend": "3.0.1", + "forever-agent": "0.6.1", + "form-data": "1.0.1", + "har-validator": "1.8.0", + "hawk": "3.1.3", + "http-signature": "0.11.0", + "isstream": "0.1.2", + "json-stringify-safe": "5.0.1", + "mime-types": "2.1.17", + "node-uuid": "1.4.8", + "oauth-sign": "0.8.2", + "qs": "4.0.0", + "stringstream": "0.0.5", + "tough-cookie": "2.3.3", + "tunnel-agent": "0.4.3" + } + }, + "sntp": { + "version": "1.0.9", + "resolved": "https://registry.npmjs.org/sntp/-/sntp-1.0.9.tgz", + "integrity": "sha1-ZUEYTMkK7qbG57NeJlkIJEPGYZg=", + "requires": { + "hoek": "2.16.3" + } + }, + "supports-color": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", + "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=" + }, + "tunnel-agent": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.4.3.tgz", + "integrity": "sha1-Y3PbdpCf5XDgjXNYM2Xtgop07us=" + } + } + }, "normalize-package-data": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.4.0.tgz", @@ -4431,6 +4848,14 @@ "path-key": "2.0.1" } }, + "nth-check": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.1.tgz", + "integrity": "sha1-mSms32KPwsQQmN6rgqxYDPFJquQ=", + "requires": { + "boolbase": "1.0.0" + } + }, "nugget": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/nugget/-/nugget-2.0.1.tgz", @@ -4477,8 +4902,7 @@ "oauth-sign": { "version": "0.8.2", "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.8.2.tgz", - "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM=", - "dev": true + "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM=" }, "object-assign": { "version": "4.1.1", @@ -4509,6 +4933,11 @@ "ee-first": "1.1.1" } }, + "on-headers": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/on-headers/-/on-headers-1.0.1.tgz", + "integrity": "sha1-ko9dD0cNSTQmUepnlLCFfBAGk/c=" + }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -6147,8 +6576,7 @@ "stringstream": { "version": "0.0.5", "resolved": "https://registry.npmjs.org/stringstream/-/stringstream-0.0.5.tgz", - "integrity": "sha1-TkhM1N5aC7vuGORjB3EKioFiGHg=", - "dev": true + "integrity": "sha1-TkhM1N5aC7vuGORjB3EKioFiGHg=" }, "strip-ansi": { "version": "3.0.1", @@ -6387,7 +6815,6 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.3.tgz", "integrity": "sha1-C2GKVWW23qkL80JdBNVe3EdadWE=", - "dev": true, "requires": { "punycode": "1.4.1" } @@ -6516,6 +6943,11 @@ "integrity": "sha1-/+3ks2slKQaW5uFl1KWe25mOawI=", "dev": true }, + "universalify": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.1.tgz", + "integrity": "sha1-+nG63UQ3r0wUiEHjs7Fl+enlkLc=" + }, "unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", @@ -6582,8 +7014,7 @@ "vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", - "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=", - "dev": true + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" }, "vendors": { "version": "1.0.1", @@ -6631,6 +7062,15 @@ } } }, + "walk-sync": { + "version": "0.3.2", + "resolved": "https://registry.npmjs.org/walk-sync/-/walk-sync-0.3.2.tgz", + "integrity": "sha512-FMB5VqpLqOCcqrzA9okZFc0wq0Qbmdm396qJxvQZhDpyu0W95G9JCmp74tx7iyYnyOcBtUuKJsgIKAqjozvmmQ==", + "requires": { + "ensure-posix-path": "1.0.2", + "matcher-collection": "1.0.5" + } + }, "watchpack": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-1.4.0.tgz", diff --git a/package.json b/package.json index 5b4236b..d924350 100644 --- a/package.json +++ b/package.json @@ -19,11 +19,15 @@ "dependencies": { "chalk": "^2.3.0", "chokidar": "^1.7.0", + "compression": "^1.7.1", "escape-html": "^1.0.3", + "fs-extra": "^5.0.0", "mkdirp": "^0.5.1", + "node-spider": "^1.4.1", "relative": "^3.0.2", "require-relative": "^0.8.7", "rimraf": "^2.6.2", + "walk-sync": "^0.3.2", "webpack": "^3.10.0", "webpack-hot-middleware": "^2.21.0" }, diff --git a/test/app/.gitignore b/test/app/.gitignore index 81c2c19..9f3fcfb 100644 --- a/test/app/.gitignore +++ b/test/app/.gitignore @@ -3,4 +3,5 @@ node_modules .sapper yarn.lock cypress/screenshots -templates/.* \ No newline at end of file +templates/.* +dist diff --git a/test/common/test.js b/test/common/test.js index 56e28f6..5870a53 100644 --- a/test/common/test.js +++ b/test/common/test.js @@ -5,6 +5,7 @@ const serve = require('serve-static'); const Nightmare = require('nightmare'); const getPort = require('get-port'); const fetch = require('node-fetch'); +const walkSync = require('walk-sync'); run('production'); run('development'); @@ -70,6 +71,7 @@ function run(env) { if (env === 'production') { const cli = path.resolve(__dirname, '../../cli/index.js'); await exec(`${cli} build`); + await exec(`${cli} extract`); } const resolved = require.resolve('../..'); @@ -306,6 +308,57 @@ function run(env) { ); }); }); + + if (env === 'production') { + describe('extract', () => { + it('extract all pages', () => { + const dest = path.resolve(__dirname, '../app/dist'); + + // Pages that should show up in the extraction directory. + const expectedPages = [ + 'index.html', + 'api/index.html', + + 'about/index.html', + 'api/about/index.html', + + 'blog/index.html', + 'api/blog/index.html', + + 'blog/a-very-long-post/index.html', + 'api/blog/a-very-long-post/index.html', + + 'blog/how-can-i-get-involved/index.html', + 'api/blog/how-can-i-get-involved/index.html', + + 'blog/how-is-sapper-different-from-next/index.html', + 'api/blog/how-is-sapper-different-from-next/index.html', + + 'blog/how-to-use-sapper/index.html', + 'api/blog/how-to-use-sapper/index.html', + + 'blog/what-is-sapper/index.html', + 'api/blog/what-is-sapper/index.html', + + 'blog/why-the-name/index.html', + 'api/blog/why-the-name/index.html', + + 'favicon.png', + 'global.css', + 'great-success.png', + 'manifest.json', + 'service-worker.js', + 'svelte-logo-192.png', + 'svelte-logo-512.png', + ]; + const allPages = walkSync(dest); + + expectedPages.forEach((expectedPage) => { + assert.ok(allPages.includes(expectedPage)); + }); + }); + }); + } }); } @@ -322,4 +375,4 @@ function exec(cmd) { fulfil(); }); }); -} \ No newline at end of file +} From d08f9eb5a495ca9ee3a133fddf043833497fd504 Mon Sep 17 00:00:00 2001 From: freedmand Date: Fri, 5 Jan 2018 14:29:46 -0800 Subject: [PATCH 2/6] Fixes funky indentation in extraction unit test --- test/common/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/test.js b/test/common/test.js index 5870a53..c3e290d 100644 --- a/test/common/test.js +++ b/test/common/test.js @@ -332,7 +332,7 @@ function run(env) { 'api/blog/how-can-i-get-involved/index.html', 'blog/how-is-sapper-different-from-next/index.html', - 'api/blog/how-is-sapper-different-from-next/index.html', + 'api/blog/how-is-sapper-different-from-next/index.html', 'blog/how-to-use-sapper/index.html', 'api/blog/how-to-use-sapper/index.html', From fc8280adeaf9ddaf80fe1bb8f37de3bb13008fe6 Mon Sep 17 00:00:00 2001 From: freedmand Date: Fri, 5 Jan 2018 14:42:04 -0800 Subject: [PATCH 3/6] Fixes small issue with reading chunk files --- lib/utils/extract.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/utils/extract.js b/lib/utils/extract.js index 36579da..d77a39e 100644 --- a/lib/utils/extract.js +++ b/lib/utils/extract.js @@ -7,7 +7,7 @@ const Spider = require('node-spider'); const path = require('path'); const { PORT = 3000, OUTPUT_DIR = 'dist' } = process.env; -const { dest = sapperDest } = require('../config.js'); +const { dest } = require('../config.js'); const prefix = `http://localhost:${PORT}`; @@ -81,7 +81,7 @@ function filter(url) { */ function getChunkFiles() { const clientInfo = - fs.readJsonSync(path.join(sapperDest, 'stats.client.json')); + fs.readJsonSync(path.join(dest, 'stats.client.json')); const chunkFiles = clientInfo.assets.map(chunk => `/client/${chunk.name}`); return chunkFiles; } From 7588911108531b145021586e4f90106d6590c6c0 Mon Sep 17 00:00:00 2001 From: freedmand Date: Fri, 5 Jan 2018 14:56:58 -0800 Subject: [PATCH 4/6] Removes all async/await from the extraction pipeline, and adds unit tests for extracted client pages that match a regular expression --- lib/utils/extract.js | 167 ++++++++++++++++++++++--------------------- test/common/test.js | 25 +++++++ 2 files changed, 109 insertions(+), 83 deletions(-) diff --git a/lib/utils/extract.js b/lib/utils/extract.js index d77a39e..e41e252 100644 --- a/lib/utils/extract.js +++ b/lib/utils/extract.js @@ -98,7 +98,7 @@ function getChunkFiles() { * @param {number=} extractionDir The directory in which to place the extracted * output. */ -module.exports = async function(includeUrls = null, excludeUrls = null, +module.exports = function(includeUrls = null, excludeUrls = null, extractionDir = OUTPUT_DIR) { // Set up the server. @@ -127,97 +127,98 @@ module.exports = async function(includeUrls = null, excludeUrls = null, // scraper. The program automatically exits after all the static pages have // been scraped from the server that are accessible from the root page (`/`). const extractedFiles = []; // keep track of extracted files. - const server = await app.listen(PORT); - console.log(`listening on port ${PORT} and beginning extraction`); + const server = app.listen(PORT, () => { + console.log(`listening on port ${PORT} and beginning extraction`); + return new Promise((resolve, reject) => { + const spider = new Spider({ + concurrent: 5, + delay: 0, + logs: process.stderr, + allowDuplicates: false, + catchErrors: true, + addReferrer: false, + xhr: false, + keepAlive: false, + error: (err, url) => { + console.error(`ERROR ${err} at ${url}`); + reject(); + }, + // Called when there are no more requests + done: () => { + server.close(() => { + console.log('Done!'); + resolve(); + }); + }, - return new Promise((resolve, reject) => { - const spider = new Spider({ - concurrent: 5, - delay: 0, - logs: process.stderr, - allowDuplicates: false, - catchErrors: true, - addReferrer: false, - xhr: false, - keepAlive: false, - error: (err, url) => { - console.error(`ERROR ${err} at ${url}`); - reject(); - }, - // Called when there are no more requests - done: async () => { - await server.close(); - console.log('Done!'); - resolve(); - }, + headers: { 'user-agent': 'node-spider' }, + // Use a binary encoding to preserve image files. + encoding: 'binary' + }); - headers: { 'user-agent': 'node-spider' }, - // Use a binary encoding to preserve image files. - encoding: 'binary' - }); + // The primary logic to handle a scraped page. + const handleRequest = (doc) => { + // Only deal with the page if it is on the server, i.e. it is not an + // external link. + if (!filter(doc.url)) return; + // Skip URL if it is in the exclude list. + if (excludeUrls.includes(getFullUrl(doc.url))) return; - // The primary logic to handle a scraped page. - const handleRequest = (doc) => { - // Only deal with the page if it is on the server, i.e. it is not an - // external link. - if (!filter(doc.url)) return; - // Skip URL if it is in the exclude list. - if (excludeUrls.includes(getFullUrl(doc.url))) return; + // Grab the page's relative path and write the page contents to a local + // file. + const relPath = relativePath(doc.url); + extractedFiles.push(relPath); + console.log(`GOT ${relPath}`); // static page url + fs.outputFileSync(path.join(extractionDir, relPath), doc.res.body, + {encoding: 'binary'}); - // Grab the page's relative path and write the page contents to a local - // file. - const relPath = relativePath(doc.url); - extractedFiles.push(relPath); - console.log(`GOT ${relPath}`); // static page url - fs.outputFileSync(path.join(extractionDir, relPath), doc.res.body, - {encoding: 'binary'}); + /** + * Resolves and checks if a given URL is local; if so, adds it to the + * scraping queue. + * @param {string} url The URL to process. + */ + const process = (url) => { + // Remove trailing hash if relevant. + url = url.split('#')[0]; + // Resolve URL relative to server root. + url = doc.resolve(url); + // Crawl more if the URL is on the server. + if (filter(url)) spider.queue(url, handleRequest); + }; - /** - * Resolves and checks if a given URL is local; if so, adds it to the - * scraping queue. - * @param {string} url The URL to process. - */ - const process = (url) => { - // Remove trailing hash if relevant. - url = url.split('#')[0]; - // Resolve URL relative to server root. - url = doc.resolve(url); - // Crawl more if the URL is on the server. - if (filter(url)) spider.queue(url, handleRequest); + const extension = getExtension(relPath); + if (extension == 'html') { + // Grab src and href attributes from html pages. + doc.$('[src]').each((i, elem) => { + process(doc.$(elem).attr('src')); + }); + doc.$('[href]').each((i, elem) => { + process(doc.$(elem).attr('href')); + }); + } + + if (doc.url.endsWith('/service-worker.js')) { + // Grab additional routes. + const chunkFiles = getChunkFiles(); + chunkFiles.forEach( + (url) => spider.queue(getFullUrl(url), handleRequest)); + } + + if (relPath.endsWith('/index.html') && !relPath.startsWith('/api/')) { + // Attempt to grab the /api/ version of a page that seems to be a + // basic route. + spider.queue(apiPath(doc.url), handleRequest); + } }; - const extension = getExtension(relPath); - if (extension == 'html') { - // Grab src and href attributes from html pages. - doc.$('[src]').each((i, elem) => { - process(doc.$(elem).attr('src')); - }); - doc.$('[href]').each((i, elem) => { - process(doc.$(elem).attr('href')); - }); - } + // Start crawling with the document root and the service worker. + spider.queue(getFullUrl('/'), handleRequest); + spider.queue(getFullUrl('/service-worker.js'), handleRequest); - if (doc.url.endsWith('/service-worker.js')) { - // Grab additional routes. - const chunkFiles = getChunkFiles(); - chunkFiles.forEach( + if (includeUrls !== null) { + includeUrls.forEach( (url) => spider.queue(getFullUrl(url), handleRequest)); } - - if (relPath.endsWith('/index.html') && !relPath.startsWith('/api/')) { - // Attempt to grab the /api/ version of a page that seems to be a - // basic route. - spider.queue(apiPath(doc.url), handleRequest); - } - }; - - // Start crawling with the document root and the service worker. - spider.queue(getFullUrl('/'), handleRequest); - spider.queue(getFullUrl('/service-worker.js'), handleRequest); - - if (includeUrls !== null) { - includeUrls.forEach( - (url) => spider.queue(getFullUrl(url), handleRequest)); - } + }); }); } diff --git a/test/common/test.js b/test/common/test.js index c3e290d..1c6f332 100644 --- a/test/common/test.js +++ b/test/common/test.js @@ -322,6 +322,9 @@ function run(env) { 'about/index.html', 'api/about/index.html', + 'slow-preload/index.html', + 'api/slow-preload/index.html', + 'blog/index.html', 'api/blog/index.html', @@ -351,11 +354,33 @@ function run(env) { 'svelte-logo-192.png', 'svelte-logo-512.png', ]; + // Client scripts that should show up in the extraction directory. + const expectedClientRegexes = [ + /client\/_\..*?\.js/, + /client\/about\..*?\.js/, + /client\/blog_\$slug\$\..*?\.js/, + /client\/blog\..*?\.js/, + /client\/main\..*?\.js/, + /client\/show_url\..*?\.js/, + /client\/slow_preload\..*?\.js/, + ]; const allPages = walkSync(dest); expectedPages.forEach((expectedPage) => { assert.ok(allPages.includes(expectedPage)); }); + expectedClientRegexes.forEach((expectedRegex) => { + // Ensure each client page regular expression matches at least one + // generated page. + let matched = false; + for (const page of allPages) { + if (expectedRegex.test(page)) { + matched = true; + break; + } + } + assert.ok(matched); + }); }); }); } From 9ea4137b875b0f25c00adfb63e4f209f6dc34986 Mon Sep 17 00:00:00 2001 From: freedmand Date: Fri, 5 Jan 2018 19:21:25 -0800 Subject: [PATCH 5/6] Add option to extract server-side routes at directories other than /api. Also clarifies some texts and documentation. --- lib/utils/extract.js | 27 +++++++++++++++++---------- test/common/test.js | 6 ++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/lib/utils/extract.js b/lib/utils/extract.js index e41e252..0e0dd8f 100644 --- a/lib/utils/extract.js +++ b/lib/utils/extract.js @@ -56,13 +56,14 @@ function relativePath(url) { /** * Returns the Sapper API route for the specified URL path. * @param {string} url The absolute or relative URL. + * @param {string=} apiPrefix The prefix for Sapper server-side routes. * @return {string} The URL with /api/ in front. */ -function apiPath(url) { +function apiPath(url, apiPrefix = '/api') { if (url.startsWith(prefix)) { - return `${prefix}/api${url.substr(prefix.length)}`; + return `${prefix}${apiPrefix}${url.substr(prefix.length)}`; } - return `/api${url}`; + return `${apiPrefix}${url}`; } /** @@ -88,18 +89,23 @@ function getChunkFiles() { /** * Exports the Sapper app as a static website by starting at the root and - * crawling pages that are linked, their /api/ pages, and webpack routes, as - * well as copying assets. + * crawling pages that are linked, extracting server and client routes, and + * copying assets. * @param {?Array=} includeUrls If non-null, a set of additional URLs to * scrape in the extraction. This should only be set if there are routes * that cannot be reached from the root. * @param {?Array=} excludeUrls If non-null, a set of URLs to avoid * scraping in the extraction. + * @param {string=} apiPrefix The path in which all server-side Sapper routes + * are defined. The Sapper template application uses '/api' -- if you + * diverge from the template app structure, you will want to change this. If + * your server-side Sapper routes span multiple directories, you will have + * to specify each file manually with the `includeUrls` param. * @param {number=} extractionDir The directory in which to place the extracted * output. */ module.exports = function(includeUrls = null, excludeUrls = null, - extractionDir = OUTPUT_DIR) { + apiPrefix = '/api', extractionDir = OUTPUT_DIR) { // Set up the server. // this allows us to do e.g. `fetch('/api/blog')` on the server @@ -204,10 +210,11 @@ module.exports = function(includeUrls = null, excludeUrls = null, (url) => spider.queue(getFullUrl(url), handleRequest)); } - if (relPath.endsWith('/index.html') && !relPath.startsWith('/api/')) { - // Attempt to grab the /api/ version of a page that seems to be a - // basic route. - spider.queue(apiPath(doc.url), handleRequest); + if (relPath.endsWith('/index.html') && + !relPath.startsWith(`${apiPrefix}/`)) { + // Attempt to grab the server-side route corresponding to a page that + // seems to be a basic route. + spider.queue(apiPath(doc.url, apiPrefix), handleRequest); } }; diff --git a/test/common/test.js b/test/common/test.js index 1c6f332..af95b07 100644 --- a/test/common/test.js +++ b/test/common/test.js @@ -367,7 +367,8 @@ function run(env) { const allPages = walkSync(dest); expectedPages.forEach((expectedPage) => { - assert.ok(allPages.includes(expectedPage)); + assert.ok(allPages.includes(expectedPage), + `Could not find page matching ${expectedPage}`); }); expectedClientRegexes.forEach((expectedRegex) => { // Ensure each client page regular expression matches at least one @@ -379,7 +380,8 @@ function run(env) { break; } } - assert.ok(matched); + assert.ok(matched, + `Could not find client page matching ${expectedRegex}`); }); }); }); From d6dda371ca4a6ca6f67ff44995f86c5f27e12d76 Mon Sep 17 00:00:00 2001 From: Rich Harris Date: Sun, 14 Jan 2018 12:22:41 -0500 Subject: [PATCH 6/6] typo --- test/common/test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/test.js b/test/common/test.js index 266604b..ab5a8bd 100644 --- a/test/common/test.js +++ b/test/common/test.js @@ -79,7 +79,7 @@ function run(env) { if (env === 'production') { const cli = path.resolve(__dirname, '../../cli/index.js'); - exec_promise = exec(`${cli} build`).then(() => exec(`${cli} extract)); + exec_promise = exec(`${cli} build`).then(() => exec(`${cli} extract`)); } return exec_promise.then(() => {