| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294 |
- /*
- * Copyright 2018 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- *
- * This file is part of SingleFile.
- *
- * SingleFile is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * SingleFile is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
- */
- /* global SingleFileCore, DOMParser, URL, setTimeout, TextDecoder, Blob, fetch, base64, superFetch, srcsetParser, cssMinifier, htmlMinifier, cssRulesMinifier, fontsMinifier, serializer, docHelper, mediasMinifier, TextEncoder, crypto, matchedRules, imagesMinifier, FontFace, cssTree */
- this.SingleFile = this.SingleFile || (() => {
- const ONE_MB = 1024 * 1024;
- const DEBUG = false;
- const PREFIX_CONTENT_TYPE_TEXT = "text/";
- const FONT_FACE_TEST_MAX_DELAY = 1000;
- // --------
- // Download
- // --------
- let fetchResource;
- if (this.serializer === undefined) {
- this.serializer = {
- process(doc) {
- const docType = doc.doctype;
- let docTypeString = "";
- if (docType) {
- docTypeString = "<!DOCTYPE " + docType.nodeName;
- if (docType.publicId) {
- docTypeString += " PUBLIC \"" + docType.publicId + "\"";
- if (docType.systemId)
- docTypeString += " \"" + docType.systemId + "\"";
- } else if (docType.systemId)
- docTypeString += " SYSTEM \"" + docType.systemId + "\"";
- if (docType.internalSubset)
- docTypeString += " [" + docType.internalSubset + "]";
- docTypeString += "> ";
- }
- return docTypeString + doc.documentElement.outerHTML;
- }
- };
- }
- class Download {
- static async getContent(resourceURL, options) {
- let resourceContent, startTime;
- if (DEBUG) {
- startTime = Date.now();
- log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
- }
- if (!fetchResource) {
- fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
- }
- try {
- resourceContent = await fetchResource(resourceURL);
- } catch (error) {
- return options && options.asDataURI ? "data:base64," : "";
- }
- if (resourceContent.status >= 400) {
- return options && options.asDataURI ? "data:base64," : "";
- }
- let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
- let charSet;
- if (contentType) {
- const matchContentType = contentType.toLowerCase().split(";");
- contentType = matchContentType[0].trim();
- if (contentType.indexOf("/") <= 0) {
- contentType = null;
- }
- const charSetValue = matchContentType[1] && matchContentType[1].trim();
- if (charSetValue) {
- const matchCharSet = charSetValue.match(/^charset=(.*)/);
- if (matchCharSet) {
- charSet = docHelper.removeQuotes(matchCharSet[1]);
- }
- }
- }
- if (options && options.asDataURI) {
- try {
- const buffer = await resourceContent.arrayBuffer();
- if (DEBUG) {
- log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
- }
- const dataURI = "data:" + (contentType || "") + ";" + "base64," + base64.fromByteArray(new Uint8Array(buffer));
- if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
- return "data:base64,";
- } else {
- return dataURI;
- }
- } catch (error) {
- return "data:base64,";
- }
- } else {
- if (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT)) {
- return "";
- }
- if (!charSet) {
- const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
- if (matchCharset && matchCharset[1] || options.charSet) {
- charSet = (matchCharset && matchCharset[1].toLowerCase()) || options.charSet;
- }
- }
- if (!charSet) {
- charSet = "utf-8";
- }
- const arrayBuffer = await resourceContent.arrayBuffer();
- if (DEBUG) {
- log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
- }
- try {
- return getTextContent(charSet, arrayBuffer, options);
- } catch (error) {
- try {
- return getTextContent("utf-8", arrayBuffer, options);
- } catch (error) {
- return "";
- }
- }
- }
- }
- }
- function getTextContent(charSet, arrayBuffer, options) {
- const textContent = (new TextDecoder(charSet)).decode(arrayBuffer);
- if (options.maxResourceSizeEnabled && textContent.length > options.maxResourceSize * ONE_MB) {
- return "";
- } else {
- return textContent;
- }
- }
- // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
- function hex(buffer) {
- var hexCodes = [];
- var view = new DataView(buffer);
- for (var i = 0; i < view.byteLength; i += 4) {
- var value = view.getUint32(i);
- var stringValue = value.toString(16);
- var padding = "00000000";
- var paddedValue = (padding + stringValue).slice(-padding.length);
- hexCodes.push(paddedValue);
- }
- return hexCodes.join("");
- }
- // ---
- // DOM
- // ---
- class DOM {
- static createDoc(pageContent, baseURI) {
- const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
- let baseElement = doc.querySelector("base");
- if (!baseElement || !baseElement.getAttribute("href")) {
- if (baseElement) {
- baseElement.remove();
- }
- baseElement = doc.createElement("base");
- baseElement.setAttribute("href", baseURI);
- doc.head.insertBefore(baseElement, doc.head.firstChild);
- }
- return doc;
- }
- static getOnEventAttributeNames(doc) {
- const element = doc.createElement("div");
- const attributeNames = [];
- for (const propertyName in element) {
- if (propertyName.startsWith("on")) {
- attributeNames.push(propertyName);
- }
- }
- return attributeNames;
- }
- static getParser() {
- return DOMParser;
- }
- static async digest(algo, text) {
- const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
- return (hex(hash));
- }
- static getContentSize(content) {
- return new Blob([content]).size;
- }
- static async validFont(urlFunction) {
- try {
- const font = new FontFace("font-test", urlFunction);
- await Promise.race([font.load(), new Promise(resolve => setTimeout(() => resolve(true), FONT_FACE_TEST_MAX_DELAY))]);
- return true;
- } catch (error) {
- return false;
- }
- }
- static minifyHTML(doc, options) {
- return htmlMinifier.process(doc, options);
- }
- static postMinifyHTML(doc) {
- return htmlMinifier.postProcess(doc);
- }
- static minifyCSSRules(stylesheets, styles, mediaAllInfo) {
- return cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
- }
- static removeUnusedFonts(doc, stylesheets, styles, options) {
- return fontsMinifier.removeUnusedFonts(doc, stylesheets, styles, options);
- }
- static removeAlternativeFonts(doc, stylesheets) {
- return fontsMinifier.removeAlternativeFonts(doc, stylesheets);
- }
- static getMediaAllInfo(doc, docStyle) {
- return matchedRules.getMediaAllInfo(doc, docStyle);
- }
- static compressCSS(content, options) {
- return cssMinifier.processString(content, options);
- }
- static minifyMedias(stylesheets) {
- return mediasMinifier.process(stylesheets);
- }
- static removeAlternativeImages(doc, options) {
- return imagesMinifier.process(doc, options);
- }
- static parseSrcset(srcset) {
- return srcsetParser.process(srcset);
- }
- static preProcessDoc(doc, win, options) {
- return docHelper.preProcessDoc(doc, win, options);
- }
- static postProcessDoc(doc, options) {
- docHelper.postProcessDoc(doc, options);
- }
- static serialize(doc, compressHTML) {
- return serializer.process(doc, compressHTML);
- }
- static removeQuotes(string) {
- return docHelper.removeQuotes(string);
- }
- static windowIdAttributeName(sessionId) {
- return docHelper.windowIdAttributeName(sessionId);
- }
- static preservedSpaceAttributeName(sessionId) {
- return docHelper.preservedSpaceAttributeName(sessionId);
- }
- static removedContentAttributeName(sessionId) {
- return docHelper.removedContentAttributeName(sessionId);
- }
- static imagesAttributeName(sessionId) {
- return docHelper.imagesAttributeName(sessionId);
- }
- static inputValueAttributeName(sessionId) {
- return docHelper.inputValueAttributeName(sessionId);
- }
- }
- function log(...args) {
- console.log("S-File <browser>", ...args); // eslint-disable-line no-console
- }
- return { getClass: () => SingleFileCore.getClass(Download, DOM, URL, cssTree) };
- })();
|