/* * Copyright 2018 Gildas Lormeau * contact : gildas.lormeau gmail.com * * This file is part of SingleFile. * * The code in this file is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * (GNU AGPL) as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * The code in this file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero * General Public License for more details. * * As additional permission under GNU AGPL version 3 section 7, you may * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU * AGPL normally required by section 4, provided you include this license * notice and a URL through which recipients can access the Corresponding * Source. */ this.SingleFileCore = this.SingleFileCore || (() => { const SELECTED_CONTENT_ATTRIBUTE_NAME = "data-single-file-selected-content"; const SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME = "data-single-file-selected-content-root"; const DEBUG = false; let DocUtil, cssTree, sessionId = 0; function getClass(...args) { [DocUtil, cssTree] = args; return SingleFileClass; } class SingleFileClass { constructor(options) { this.options = options; if (options.sessionId === undefined) { options.sessionId = sessionId; sessionId++; } } async initialize() { this.runner = new Runner(this.options, true); await this.runner.loadPage(); await this.runner.initialize(); } async run() { await this.runner.run(); } async getPageData() { return this.runner.getPageData(); } } SingleFileClass.SELECTED_CONTENT_ATTRIBUTE_NAME = SELECTED_CONTENT_ATTRIBUTE_NAME; SingleFileClass.SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME = SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME; // ------------- // ProgressEvent // ------------- const PAGE_LOADING = "page-loading"; const PAGE_LOADED = "page-loaded"; const RESOURCES_INITIALIZING = "resource-initializing"; const RESOURCES_INITIALIZED = "resources-initialized"; const RESOURCE_LOADED = "resource-loaded"; const PAGE_ENDED = "page-ended"; const STAGE_STARTED = "stage-started"; const STAGE_ENDED = "stage-ended"; const STAGE_TASK_STARTED = "stage-task-started"; const STAGE_TASK_ENDED = "stage-task-ended"; class ProgressEvent { constructor(type, detail) { return { type, detail, PAGE_LOADING, PAGE_LOADED, RESOURCES_INITIALIZING, RESOURCES_INITIALIZED, RESOURCE_LOADED, PAGE_ENDED, STAGE_STARTED, STAGE_ENDED, STAGE_TASK_STARTED, STAGE_TASK_ENDED }; } } // ------ // Runner // ------ const RESOLVE_URLS_STAGE = 0; const REPLACE_DATA_STAGE = 1; const REPLACE_DOCS_STAGE = 2; const POST_PROCESS_STAGE = 3; const STAGES = [{ sequential: [ { action: "preProcessPage" }, { action: "insertShadowRootContents" }, { action: "replaceStyleContents" }, { option: "selected", action: "removeUnselectedElements" }, { option: "removeVideoSrc", action: "insertVideoPosters" }, { option: "removeFrames", action: "removeFrames" }, { option: "removeImports", action: "removeImports" }, { option: "removeScripts", action: "removeScripts" }, { action: "removeDiscardedResources" }, { action: "resetCharsetMeta" }, { action: "setInputValues" }, { option: "insertFaviconLink", action: "insertFaviconLink" }, { action: "replaceCanvasElements" }, { action: "insertFonts" }, { option: "removeHiddenElements", action: "removeHiddenElements" }, { action: "resolveHrefs" }, { action: "resolveStyleAttributeURLs" } ], parallel: [ { action: "resolveStylesheetURLs" }, { option: "!removeFrames", action: "resolveFrameURLs" }, { option: "!removeImports", action: "resolveHtmlImportURLs" } ] }, { sequential: [ { option: "removeUnusedStyles", action: "removeUnusedStyles" }, { option: "removeAlternativeMedias", action: "removeAlternativeMedias" }, { option: "removeUnusedFonts", action: "removeUnusedFonts" } ], parallel: [ { action: "processStylesheets" }, { action: "processStyleAttributes" }, { action: "processPageResources" }, { option: "!removeScripts", action: "processScripts" } ] }, { sequential: [ { option: "removeAlternativeImages", action: "removeAlternativeImages" }, { option: "removeAlternativeFonts", action: "removeAlternativeFonts" } ], parallel: [ { option: "!removeFrames", action: "processFrames" }, { option: "!removeImports", action: "processHtmlImports" }, ] }, { sequential: [ { action: "replaceStylesheets" }, { action: "replaceStyleAttributes" }, { action: "insertVariables" }, { option: "compressHTML", action: "compressHTML" } ] }]; class Runner { constructor(options, root) { this.root = root; this.options = options; this.options.url = this.options.url || this.options.doc.location.href; this.options.baseURI = this.options.doc && this.options.doc.baseURI; this.batchRequest = new BatchRequest(); this.processor = new Processor(options, this.batchRequest); if (this.options.doc) { const docData = DocUtil.preProcessDoc(this.options.doc, this.options.win, this.options); this.options.canvasData = docData.canvasData; this.options.fontsData = docData.fontsData; this.options.stylesheetContents = docData.stylesheetContents; this.options.imageData = docData.imageData; this.options.postersData = docData.postersData; this.options.usedFonts = docData.usedFonts; this.options.shadowRootContents = docData.shadowRootContents; } this.options.content = this.options.content || (this.options.doc ? DocUtil.serialize(this.options.doc, false) : null); this.onprogress = options.onprogress || (() => { }); } async loadPage() { this.onprogress(new ProgressEvent(PAGE_LOADING, { pageURL: this.options.url, frame: !this.root })); await this.processor.loadPage(this.options.content); this.onprogress(new ProgressEvent(PAGE_LOADED, { pageURL: this.options.url, frame: !this.root })); } async initialize() { this.onprogress(new ProgressEvent(RESOURCES_INITIALIZING, { pageURL: this.options.url })); await this.executeStage(RESOLVE_URLS_STAGE); this.pendingPromises = this.executeStage(REPLACE_DATA_STAGE); if (this.options.doc) { DocUtil.postProcessDoc(this.options.doc, this.options); this.options.doc = null; this.options.win = null; } } async run() { if (this.root) { this.processor.initialize(this.batchRequest); this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, index: 0, max: this.processor.maxResources })); } await this.batchRequest.run(detail => { detail.pageURL = this.options.url; this.onprogress(new ProgressEvent(RESOURCE_LOADED, detail)); }, this.options); await this.pendingPromises; await this.executeStage(REPLACE_DOCS_STAGE); await this.executeStage(POST_PROCESS_STAGE); await this.processor.end(); } async getPageData() { if (this.root) { this.onprogress(new ProgressEvent(PAGE_ENDED, { pageURL: this.options.url })); } return this.processor.getPageData(); } async executeStage(step) { if (DEBUG) { log("**** STARTED STAGE", step, "****"); } const frame = !this.root; this.onprogress(new ProgressEvent(STAGE_STARTED, { pageURL: this.options.url, step, frame })); STAGES[step].sequential.forEach(task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" -- STARTED task =", task.action); } this.onprogress(new ProgressEvent(STAGE_TASK_STARTED, { pageURL: this.options.url, step, task: task.action, frame })); this.executeTask(task); this.onprogress(new ProgressEvent(STAGE_TASK_ENDED, { pageURL: this.options.url, step, task: task.action, frame })); if (DEBUG) { log(" -- ENDED task =", task.action, "delay =", Date.now() - startTime); } }); let parallelTasksPromise; if (STAGES[step].parallel) { parallelTasksPromise = await Promise.all(STAGES[step].parallel.map(async task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" // STARTED task =", task.action); } this.onprogress(new ProgressEvent(STAGE_TASK_STARTED, { pageURL: this.options.url, step, task: task.action, frame })); await this.executeTask(task); this.onprogress(new ProgressEvent(STAGE_TASK_ENDED, { pageURL: this.options.url, step, task: task.action, frame })); if (DEBUG) { log(" // ENDED task =", task.action, "delay =", Date.now() - startTime); } })); } else { parallelTasksPromise = Promise.resolve(); } this.onprogress(new ProgressEvent(STAGE_ENDED, { pageURL: this.options.url, step, frame })); if (DEBUG) { log("**** ENDED STAGE", step, "****"); } return parallelTasksPromise; } executeTask(task) { if (!task.option || ((task.option.startsWith("!") && !this.options[task.option]) || this.options[task.option])) { return this.processor[task.action](); } } } // ------------ // BatchRequest // ------------ class BatchRequest { constructor() { this.requests = new Map(); this.duplicates = new Map(); } async addURL(resourceURL, asDataURI, groupDuplicates) { return new Promise((resolve, reject) => { const requestKey = JSON.stringify([resourceURL, asDataURI]); let resourceRequests = this.requests.get(requestKey); if (!resourceRequests) { resourceRequests = []; this.requests.set(requestKey, resourceRequests); } const callbacks = { resolve, reject }; resourceRequests.push(callbacks); if (groupDuplicates) { let duplicateRequests = this.duplicates.get(requestKey); if (!duplicateRequests) { duplicateRequests = []; this.duplicates.set(requestKey, duplicateRequests); } duplicateRequests.push(callbacks); } }); } getMaxResources() { return Array.from(this.requests.keys()).length; } async run(onloadListener, options) { const resourceURLs = Array.from(this.requests.keys()); let indexResource = 0; return Promise.all(resourceURLs.map(async requestKey => { const [resourceURL, asDataURI] = JSON.parse(requestKey); const resourceRequests = this.requests.get(requestKey); try { const content = await DocUtil.getContent(resourceURL, { asDataURI, maxResourceSize: options.maxResourceSize, maxResourceSizeEnabled: options.maxResourceSizeEnabled }); indexResource = indexResource + 1; onloadListener({ index: indexResource, url: resourceURL }); resourceRequests.forEach(callbacks => { const duplicateCallbacks = this.duplicates.get(requestKey); const duplicate = duplicateCallbacks && duplicateCallbacks.length > 1 && duplicateCallbacks.includes(callbacks); callbacks.resolve({ content: content.data, indexResource, duplicate }); }); } catch (error) { indexResource = indexResource + 1; onloadListener({ index: indexResource, url: resourceURL }); resourceRequests.forEach(resourceRequest => resourceRequest.reject(error)); } this.requests.delete(requestKey); })); } } // --------- // Processor // --------- const EMPTY_DATA_URI = "data:base64,"; const EMPTY_IMAGE = "data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=="; const SCRIPT_TAG_FOUND = /