/* * Copyright 2018 Gildas Lormeau * contact : gildas.lormeau gmail.com * * This file is part of SingleFile. * * The code in this file is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * (GNU AGPL) as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * The code in this file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero * General Public License for more details. * * As additional permission under GNU AGPL version 3 section 7, you may * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU * AGPL normally required by section 4, provided you include this license * notice and a URL through which recipients can access the Corresponding * Source. */ /* global CSSRule */ this.SingleFileCore = this.SingleFileCore || (() => { const SELECTED_CONTENT_ATTRIBUTE_NAME = "data-single-file-selected-content"; const SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME = "data-single-file-selected-content-root"; const DEBUG = false; let Download, DOM, URL, sessionId = 0; function getClass(...args) { [Download, DOM, URL] = args; return SingleFileClass; } class SingleFileClass { constructor(options) { this.options = options; if (options.sessionId === undefined) { options.sessionId = sessionId; sessionId++; } } async initialize() { this.processor = new PageProcessor(this.options); await this.processor.loadPage(); await this.processor.initialize(); } async run() { await this.processor.run(); } async getPageData() { return this.processor.getPageData(); } } SingleFileClass.SELECTED_CONTENT_ATTRIBUTE_NAME = SELECTED_CONTENT_ATTRIBUTE_NAME; SingleFileClass.SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME = SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME; // ------------- // ProgressEvent // ------------- const PAGE_LOADING = "page-loading"; const PAGE_LOADED = "page-loaded"; const RESOURCES_INITIALIZING = "resource-initializing"; const RESOURCES_INITIALIZED = "resources-initialized"; const RESOURCE_LOADED = "resource-loaded"; const PAGE_ENDED = "page-ended"; class ProgressEvent { constructor(type, details) { return { type, details, PAGE_LOADING, PAGE_LOADED, RESOURCES_INITIALIZING, RESOURCES_INITIALIZED, RESOURCE_LOADED, PAGE_ENDED }; } } // ------------- // PageProcessor // ------------- const RESOLVE_URLS_STAGE = 0; const REPLACE_DATA_STAGE = 1; const REPLACE_DOCS_STAGE = 2; const POST_PROCESS_STAGE = 3; const STAGES = [{ sequential: [ { action: "preProcessPage" }, { action: "replaceStyleContents" }, { option: "selected", action: "removeUnselectedElements" }, { option: "removeVideoSrc", action: "insertVideoPosters" }, { option: "removeFrames", action: "removeFrames" }, { option: "removeImports", action: "removeImports" }, { option: "removeScripts", action: "removeScripts" }, { action: "removeDiscardedResources" }, { action: "resetCharsetMeta" }, { action: "setInputValues" }, { option: "insertFaviconLink", action: "insertFaviconLink" }, { action: "resolveHrefs" }, { action: "replaceCanvasElements" }, { action: "insertFonts" }, { option: "removeHiddenElements", action: "removeHiddenElements" }, { action: "resolveStyleAttributeURLs" } ], parallel: [ { action: "resolveStylesheetURLs" }, { action: "resolveLinkedStylesheetURLs" }, { option: "!removeFrames", action: "resolveFrameURLs" }, { option: "!removeImports", action: "resolveHtmlImportURLs" } ] }, { sequential: [ { option: "removeUnusedStyles", action: "removeUnusedStyles" }, { option: "removeAlternativeMedias", action: "removeAlternativeMedias" }, { option: "removeUnusedStyles", action: "removeUnusedFonts" } ], parallel: [ { action: "processStylesheets" }, { action: "processStyleAttributes" }, { action: "processPageResources" }, { option: "!removeScripts", action: "processScripts" } ] }, { sequential: [ { option: "removeAlternativeImages", action: "removeAlternativeImages" }, { option: "removeAlternativeFonts", action: "removeAlternativeFonts" }, { option: "compressCSS", action: "compressCSS" } ], parallel: [ { option: "!removeFrames", action: "processFrames" }, { option: "!removeImports", action: "processHtmlImports" }, ] }, { sequential: [ { option: "compressHTML", action: "compressHTML" } ] }]; class PageProcessor { constructor(options) { this.options = options; this.options.url = this.options.url || this.options.doc.location.href; this.options.baseURI = this.options.doc && this.options.doc.baseURI; this.batchRequest = new BatchRequest(); this.processor = new DOMProcessor(options, this.batchRequest); if (this.options.doc) { const docData = DOM.preProcessDoc(this.options.doc, this.options.win, this.options); this.options.canvasData = docData.canvasData; this.options.fontsData = docData.fontsData; this.options.stylesheetContents = docData.stylesheetContents; this.options.imageData = docData.imageData; this.options.postersData = docData.postersData; this.options.usedFonts = docData.usedFonts; } this.options.content = this.options.content || (this.options.doc ? DOM.serialize(this.options.doc, false) : null); this.onprogress = options.onprogress || (() => { }); } async loadPage() { this.onprogress(new ProgressEvent(PAGE_LOADING, { pageURL: this.options.url })); await this.processor.loadPage(this.options.content); this.onprogress(new ProgressEvent(PAGE_LOADED, { pageURL: this.options.url })); } async initialize() { this.onprogress(new ProgressEvent(RESOURCES_INITIALIZING, { pageURL: this.options.url })); await this.executeStage(RESOLVE_URLS_STAGE); this.pendingPromises = this.executeStage(REPLACE_DATA_STAGE); if (this.options.doc) { DOM.postProcessDoc(this.options.doc, this.options); this.options.doc = null; this.options.win = null; } } async run() { if (!this.options.windowId) { this.processor.initialize(this.batchRequest); this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, index: 0, max: this.processor.maxResources })); } await this.batchRequest.run(details => { details.pageURL = this.options.url; this.onprogress(new ProgressEvent(RESOURCE_LOADED, details)); }, this.options); await this.pendingPromises; await this.executeStage(REPLACE_DOCS_STAGE); await this.executeStage(POST_PROCESS_STAGE); await this.processor.end(); } async getPageData() { if (!this.options.windowId) { this.onprogress(new ProgressEvent(PAGE_ENDED, { pageURL: this.options.url })); } return this.processor.getPageData(); } async executeStage(step) { if (DEBUG) { log("**** STARTED STAGE", step, "****"); } STAGES[step].sequential.forEach(task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" -- STARTED task =", task.action); } this.executeTask(task); if (DEBUG) { log(" -- ENDED task =", task.action, "delay =", Date.now() - startTime); } }); if (STAGES[step].parallel) { return await Promise.all(STAGES[step].parallel.map(task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" // STARTED task =", task.action); } const promise = this.executeTask(task); if (DEBUG) { promise.then(() => log(" // ENDED task =", task.action, "delay =", Date.now() - startTime)); } return promise; })); } if (DEBUG) { log("**** ENDED STAGE", step, "****"); } } executeTask(task) { if (!task.option || ((task.option.startsWith("!") && !this.options[task.option]) || this.options[task.option])) { return this.processor[task.action](); } } } // -------- // BatchRequest // -------- class BatchRequest { constructor() { this.requests = new Map(); } async addURL(resourceURL, asDataURI = true) { return new Promise((resolve, reject) => { const requestKey = JSON.stringify([resourceURL, asDataURI]); const resourceRequests = this.requests.get(requestKey); if (resourceRequests) { resourceRequests.push({ resolve, reject }); } else { this.requests.set(requestKey, [{ resolve, reject }]); } }); } getMaxResources() { return Array.from(this.requests.keys()).length; } async run(onloadListener, options) { const resourceURLs = Array.from(this.requests.keys()); let indexResource = 0; return Promise.all(resourceURLs.map(async requestKey => { const [resourceURL, asDataURI] = JSON.parse(requestKey); const resourceRequests = this.requests.get(requestKey); try { const resourceContent = await Download.getContent(resourceURL, { asDataURI, maxResourceSize: options.maxResourceSize, maxResourceSizeEnabled: options.maxResourceSizeEnabled }); indexResource = indexResource + 1; onloadListener({ index: indexResource, url: resourceURL }); resourceRequests.forEach(resourceRequest => resourceRequest.resolve({ content: resourceContent, indexResource, duplicate: Boolean(resourceRequests.length > 1) })); } catch (error) { indexResource = indexResource + 1; onloadListener({ index: indexResource, url: resourceURL }); resourceRequests.forEach(resourceRequest => resourceRequest.reject(error)); } this.requests.delete(requestKey); })); } } // ------------ // DOMProcessor // ------------ const EMPTY_DATA_URI = "data:base64,"; const EMPTY_IMAGE = ""; const SCRIPT_TAG_FOUND = /