remove glob and cheerio from dependencies

This commit is contained in:
Rich Harris
2018-08-19 15:20:14 -04:00
parent 14e5c8e761
commit dba83641e4
23 changed files with 1471 additions and 1317 deletions

View File

@@ -3,7 +3,7 @@ import * as path from 'path';
import mkdirp from 'mkdirp';
import rimraf from 'rimraf';
import { EventEmitter } from 'events';
import { minify_html } from './utils/minify_html';
import minify_html from './utils/minify_html';
import { create_compilers, create_main_manifests, create_routes, create_serviceworker_manifest } from '../core';
import * as events from './interfaces';

View File

@@ -1,12 +1,12 @@
import * as child_process from 'child_process';
import * as path from 'path';
import * as sander from 'sander';
import cheerio from 'cheerio';
import URL from 'url-parse';
import fetch from 'node-fetch';
import * as ports from 'port-authority';
import { EventEmitter } from 'events';
import { minify_html } from './utils/minify_html';
import clean_html from './utils/clean_html';
import minify_html from './utils/minify_html';
import Deferred from './utils/Deferred';
import * as events from './interfaces';
@@ -123,15 +123,26 @@ async function execute(emitter: EventEmitter, {
if (range === 2) {
if (r.headers.get('Content-Type') === 'text/html') {
const body = await r.text();
const $ = cheerio.load(body);
const urls: URL[] = [];
const base = new URL($('base').attr('href') || '/', url.href);
const cleaned = clean_html(body);
$('a[href]').each((i: number, $a) => {
const url = new URL($a.attribs.href, base.href);
if (url.origin === origin) urls.push(url);
});
const base_match = /<base ([\s\S]+?)>/m.exec(cleaned);
const base_href = base_match && get_href(base_match[1]);
const base = new URL(base_href || '/', url.href);
let match;
let pattern = /<a ([\s\S]+?)>/gm;
while (match = pattern.exec(cleaned)) {
const attrs = match[1];
const href = get_href(attrs);
if (href) {
const url = new URL(href, base.href);
if (url.origin === origin) urls.push(url);
}
}
await Promise.all(urls.map(handle));
}
@@ -147,3 +158,8 @@ async function execute(emitter: EventEmitter, {
})
.then(() => proc.kill());
}
function get_href(attrs: string) {
const match = /href\s*=\s*(?:"(.+?)"|'(.+?)'|([^\s>]+))/.exec(attrs);
return match[1] || match[2] || match[3];
}

View File

@@ -1,4 +1,3 @@
import * as glob from 'glob';
import { locations } from '../config';
import { create_routes } from '../core';

View File

@@ -0,0 +1,7 @@
export default function clean_html(html: string) {
return html
.replace(/<!\[CDATA\[[\s\S]*?\]\]>/gm, '')
.replace(/(<script[\s\S]*?>)[\s\S]*?<\/script>/gm, '$1</' + 'script>')
.replace(/(<style[\s\S]*?>)[\s\S]*?<\/style>/gm, '$1</' + 'style>')
.replace(/<!--[\s\S]*?-->/gm, '');
}

View File

@@ -1,6 +1,6 @@
import { minify } from 'html-minifier';
export function minify_html(html: string) {
export default function minify_html(html: string) {
return minify(html, {
collapseBooleanAttributes: true,
collapseWhitespace: true,