merge master -> crawl-queue

This commit is contained in:
Rich Harris
2018-08-22 18:39:28 -04:00
31 changed files with 846 additions and 361 deletions

View File

@@ -3,7 +3,7 @@ import * as path from 'path';
import mkdirp from 'mkdirp';
import rimraf from 'rimraf';
import { EventEmitter } from 'events';
import { minify_html } from './utils/minify_html';
import minify_html from './utils/minify_html';
import { create_compilers, create_main_manifests, create_routes, create_serviceworker_manifest } from '../core';
import * as events from './interfaces';

View File

@@ -119,21 +119,30 @@ class Watcher extends EventEmitter {
this.dev_server = new DevServer(dev_port);
this.filewatchers.push(
watch_files(locations.routes(), ['add', 'unlink'], () => {
const routes = create_routes();
create_main_manifests({ routes, dev_port });
try {
watch_dir(
locations.routes(),
({ path: file, stats }) => {
if (stats.isDirectory()) {
return path.basename(file)[0] !== '_';
}
return true;
},
() => {
const routes = create_routes();
create_main_manifests({ routes, dev_port });
} catch (err) {
this.emit('error', <events.ErrorEvent>{
message: err.message
});
}
}),
watch_files(`${locations.app()}/template.html`, ['change'], () => {
try {
const routes = create_routes();
create_main_manifests({ routes, dev_port });
} catch (err) {
this.emit('error', <events.ErrorEvent>{
message: err.message
});
}
}
),
fs.watch(`${locations.app()}/template.html`, () => {
this.dev_server.send({
action: 'reload'
});
@@ -453,20 +462,32 @@ class DevServer {
function noop() {}
function watch_files(pattern: string, events: string[], callback: () => void) {
const chokidar = require('chokidar');
function watch_dir(
dir: string,
filter: ({ path, stats }: { path: string, stats: fs.Stats }) => boolean,
callback: () => void
) {
let watch;
let closed = false;
const watcher = chokidar.watch(pattern, {
persistent: true,
ignoreInitial: true,
disableGlobbing: true
});
import('cheap-watch').then(CheapWatch => {
if (closed) return;
events.forEach(event => {
watcher.on(event, callback);
watch = new CheapWatch({ dir, filter, debounce: 50 });
watch.on('+', ({ isNew }) => {
if (isNew) callback();
});
watch.on('-', callback);
watch.init();
});
return {
close: () => watcher.close()
close: () => {
if (watch) watch.close();
closed = true;
}
};
}
}

View File

@@ -1,13 +1,13 @@
import * as child_process from 'child_process';
import * as path from 'path';
import * as sander from 'sander';
import cheerio from 'cheerio';
import URL from 'url-parse';
import fetch from 'node-fetch';
import * as yootils from 'yootils';
import * as ports from 'port-authority';
import { EventEmitter } from 'events';
import { minify_html } from './utils/minify_html';
import clean_html from './utils/clean_html';
import minify_html from './utils/minify_html';
import Deferred from './utils/Deferred';
import * as events from './interfaces';
@@ -124,20 +124,31 @@ async function execute(emitter: EventEmitter, {
if (range === 2) {
if (r.headers.get('Content-Type') === 'text/html') {
const body = await r.text();
const $ = cheerio.load(body);
const urls: URL[] = [];
const base = new URL($('base').attr('href') || '/', url.href);
const cleaned = clean_html(body);
const q = yootils.queue(8);
let promise;
$('a[href]').each((i: number, $a) => {
const url = new URL($a.attribs.href, base.href);
if (url.origin === origin) {
promise = q.add(() => handle(url));
const base_match = /<base ([\s\S]+?)>/m.exec(cleaned);
const base_href = base_match && get_href(base_match[1]);
const base = new URL(base_href || '/', url.href);
let match;
let pattern = /<a ([\s\S]+?)>/gm;
while (match = pattern.exec(cleaned)) {
const attrs = match[1];
const href = get_href(attrs);
if (href) {
const url = new URL(href, base.href);
if (url.origin === origin) {
promise = q.add(() => handle(url));
}
}
});
}
await promise;
}
@@ -153,3 +164,8 @@ async function execute(emitter: EventEmitter, {
})
.then(() => proc.kill());
}
function get_href(attrs: string) {
const match = /href\s*=\s*(?:"(.+?)"|'(.+?)'|([^\s>]+))/.exec(attrs);
return match[1] || match[2] || match[3];
}

View File

@@ -1,4 +1,3 @@
import * as glob from 'glob';
import { locations } from '../config';
import { create_routes } from '../core';

View File

@@ -0,0 +1,7 @@
export default function clean_html(html: string) {
return html
.replace(/<!\[CDATA\[[\s\S]*?\]\]>/gm, '')
.replace(/(<script[\s\S]*?>)[\s\S]*?<\/script>/gm, '$1</' + 'script>')
.replace(/(<style[\s\S]*?>)[\s\S]*?<\/style>/gm, '$1</' + 'style>')
.replace(/<!--[\s\S]*?-->/gm, '');
}

View File

@@ -1,6 +1,6 @@
import { minify } from 'html-minifier';
export function minify_html(html: string) {
export default function minify_html(html: string) {
return minify(html, {
collapseBooleanAttributes: true,
collapseWhitespace: true,