/* * Copyright 2010-2020 Gildas Lormeau * contact : gildas.lormeau gmail.com * * This file is part of SingleFile. * * The code in this file is free software: you can redistribute it and/or * modify it under the terms of the GNU Affero General Public License * (GNU AGPL) as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * The code in this file is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero * General Public License for more details. * * As additional permission under GNU AGPL version 3 section 7, you may * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU * AGPL normally required by section 4, provided you include this license * notice and a URL through which recipients can access the Corresponding * Source. */ this.singlefile.lib.core = this.singlefile.lib.core || (() => { const DEBUG = false; let util, cssTree; function getClass(...args) { [util, cssTree] = args; return SingleFileClass; } class SingleFileClass { constructor(options) { this.options = options; } async run() { if (this.options.userScriptEnabled) { await util.waitForUserScript(util.ON_BEFORE_CAPTURE_EVENT_NAME); } this.runner = new Runner(this.options, true); await this.runner.loadPage(); await this.runner.initialize(); if (this.options.userScriptEnabled) { await util.waitForUserScript(util.ON_AFTER_CAPTURE_EVENT_NAME); } await this.runner.run(); } cancel() { this.cancelled = true; if (this.runner) { this.runner.cancel(); } } getPageData() { return this.runner.getPageData(); } } // ------------- // ProgressEvent // ------------- const PAGE_LOADING = "page-loading"; const PAGE_LOADED = "page-loaded"; const RESOURCES_INITIALIZING = "resource-initializing"; const RESOURCES_INITIALIZED = "resources-initialized"; const RESOURCE_LOADED = "resource-loaded"; const PAGE_ENDED = "page-ended"; const STAGE_STARTED = "stage-started"; const STAGE_ENDED = "stage-ended"; const STAGE_TASK_STARTED = "stage-task-started"; const STAGE_TASK_ENDED = "stage-task-ended"; class ProgressEvent { constructor(type, detail) { return { type, detail, PAGE_LOADING, PAGE_LOADED, RESOURCES_INITIALIZING, RESOURCES_INITIALIZED, RESOURCE_LOADED, PAGE_ENDED, STAGE_STARTED, STAGE_ENDED, STAGE_TASK_STARTED, STAGE_TASK_ENDED }; } } // ------ // Runner // ------ const RESOLVE_URLS_STAGE = 0; const REPLACE_DATA_STAGE = 1; const REPLACE_DOCS_STAGE = 2; const POST_PROCESS_STAGE = 3; const STAGES = [{ sequential: [ { action: "preProcessPage" }, { option: "loadDeferredImagesKeepZoomLevel", action: "resetZoomLevel" }, { action: "replaceStyleContents" }, { action: "resetCharsetMeta" }, { option: "saveFavicon", action: "saveFavicon" }, { action: "replaceCanvasElements" }, { action: "insertFonts" }, { action: "insertShadowRootContents" }, { action: "setInputValues" }, { option: "removeScripts", action: "removeScripts" }, { option: "selected", action: "removeUnselectedElements" }, { option: "removeVideoSrc", action: "insertVideoPosters" }, { option: "removeFrames", action: "removeFrames" }, { option: "removeVideoSrc", action: "removeVideoSources" }, { option: "removeAudioSrc", action: "removeAudioSources" }, { action: "removeDiscardedResources" }, { option: "removeHiddenElements", action: "removeHiddenElements" }, { action: "resolveHrefs" }, { action: "resolveStyleAttributeURLs" } ], parallel: [ { action: "resolveStylesheetURLs" }, { option: "!removeFrames", action: "resolveFrameURLs" }, { action: "resolveHtmlImportURLs" } ] }, { sequential: [ { option: "removeUnusedStyles", action: "removeUnusedStyles" }, { option: "removeAlternativeMedias", action: "removeAlternativeMedias" }, { option: "removeUnusedFonts", action: "removeUnusedFonts" } ], parallel: [ { action: "processStylesheets" }, { action: "processStyleAttributes" }, { action: "processPageResources" }, { option: "!removeScripts", action: "processScripts" } ] }, { sequential: [ { option: "removeAlternativeImages", action: "removeAlternativeImages" } ], parallel: [ { option: "removeAlternativeFonts", action: "removeAlternativeFonts" }, { option: "!removeFrames", action: "processFrames" }, { option: "!removeImports", action: "processHtmlImports" }, ] }, { sequential: [ { action: "replaceStylesheets" }, { action: "replaceStyleAttributes" }, { action: "insertVariables" }, { option: "compressHTML", action: "compressHTML" }, { action: "cleanupPage" } ], parallel: [ { option: "enableMaff", action: "insertMAFFMetaData" }, { action: "setDocInfo" } ] }]; class Runner { constructor(options, root) { const rootDocDefined = root && options.doc; this.root = root; this.options = options; this.options.url = this.options.url || (rootDocDefined && this.options.doc.location.href); const matchResourceReferrer = this.options.url.match(/^.*\//); this.options.resourceReferrer = this.options.passReferrerOnError && matchResourceReferrer && matchResourceReferrer[0]; this.options.baseURI = rootDocDefined && this.options.doc.baseURI; this.options.rootDocument = root; this.options.updatedResources = this.options.updatedResources || {}; this.options.fontTests = new Map(); this.batchRequest = new BatchRequest(); this.processor = new Processor(options, this.batchRequest); if (rootDocDefined) { const docData = util.preProcessDoc(this.options.doc, this.options.win, this.options); this.options.canvases = docData.canvases; this.options.fonts = docData.fonts; this.options.stylesheets = docData.stylesheets; this.options.images = docData.images; this.options.posters = docData.posters; this.options.usedFonts = docData.usedFonts; this.options.shadowRoots = docData.shadowRoots; this.options.imports = docData.imports; this.options.referrer = docData.referrer; this.markedElements = docData.markedElements; } if (this.options.saveRawPage) { this.options.removeFrames = true; } this.options.content = this.options.content || (rootDocDefined ? util.serialize(this.options.doc) : null); this.onprogress = options.onprogress || (() => { }); } async loadPage() { this.onprogress(new ProgressEvent(PAGE_LOADING, { pageURL: this.options.url, frame: !this.root })); await this.processor.loadPage(this.options.content); this.onprogress(new ProgressEvent(PAGE_LOADED, { pageURL: this.options.url, frame: !this.root })); } async initialize() { this.onprogress(new ProgressEvent(RESOURCES_INITIALIZING, { pageURL: this.options.url })); await this.executeStage(RESOLVE_URLS_STAGE); this.pendingPromises = this.executeStage(REPLACE_DATA_STAGE); if (this.root && this.options.doc) { util.postProcessDoc(this.options.doc, this.markedElements); } } cancel() { this.cancelled = true; this.batchRequest.cancel(); if (this.root) { if (this.options.frames) { this.options.frames.forEach(cancelRunner); } if (this.options.imports) { this.options.imports.forEach(cancelRunner); } } function cancelRunner(resourceData) { if (resourceData.runner) { resourceData.runner.cancel(); } } } async run() { if (this.root) { this.processor.initialize(this.batchRequest); this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, max: this.processor.maxResources })); } await this.batchRequest.run(detail => { detail.pageURL = this.options.url; this.onprogress(new ProgressEvent(RESOURCE_LOADED, detail)); }, this.options); await this.pendingPromises; this.options.doc = null; this.options.win = null; await this.executeStage(REPLACE_DOCS_STAGE); await this.executeStage(POST_PROCESS_STAGE); this.processor.finalize(); } getDocument() { return this.processor.doc; } getStyleSheets() { return this.processor.stylesheets; } getPageData() { if (this.root) { this.onprogress(new ProgressEvent(PAGE_ENDED, { pageURL: this.options.url })); } return this.processor.getPageData(); } async executeStage(step) { if (DEBUG) { log("**** STARTED STAGE", step, "****"); } const frame = !this.root; this.onprogress(new ProgressEvent(STAGE_STARTED, { pageURL: this.options.url, step, frame })); STAGES[step].sequential.forEach(task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" -- STARTED task =", task.action); } this.onprogress(new ProgressEvent(STAGE_TASK_STARTED, { pageURL: this.options.url, step, task: task.action, frame })); if (!this.cancelled) { this.executeTask(task); } this.onprogress(new ProgressEvent(STAGE_TASK_ENDED, { pageURL: this.options.url, step, task: task.action, frame })); if (DEBUG) { log(" -- ENDED task =", task.action, "delay =", Date.now() - startTime); } }); let parallelTasksPromise; if (STAGES[step].parallel) { parallelTasksPromise = await Promise.all(STAGES[step].parallel.map(async task => { let startTime; if (DEBUG) { startTime = Date.now(); log(" // STARTED task =", task.action); } this.onprogress(new ProgressEvent(STAGE_TASK_STARTED, { pageURL: this.options.url, step, task: task.action, frame })); if (!this.cancelled) { await this.executeTask(task); } this.onprogress(new ProgressEvent(STAGE_TASK_ENDED, { pageURL: this.options.url, step, task: task.action, frame })); if (DEBUG) { log(" // ENDED task =", task.action, "delay =", Date.now() - startTime); } })); } else { parallelTasksPromise = Promise.resolve(); } this.onprogress(new ProgressEvent(STAGE_ENDED, { pageURL: this.options.url, step, frame })); if (DEBUG) { log("**** ENDED STAGE", step, "****"); } return parallelTasksPromise; } executeTask(task) { if (!task.option || ((task.option.startsWith("!") && !this.options[task.option]) || this.options[task.option])) { return this.processor[task.action](); } } } // ------------ // BatchRequest // ------------ class BatchRequest { constructor() { this.requests = new Map(); this.duplicates = new Map(); } addURL(resourceURL, asBinary, expectedType, groupDuplicates) { return new Promise((resolve, reject) => { const requestKey = JSON.stringify([resourceURL, asBinary, expectedType]); let resourceRequests = this.requests.get(requestKey); if (!resourceRequests) { resourceRequests = []; this.requests.set(requestKey, resourceRequests); } const callbacks = { resolve, reject }; resourceRequests.push(callbacks); if (groupDuplicates) { let duplicateRequests = this.duplicates.get(requestKey); if (!duplicateRequests) { duplicateRequests = []; this.duplicates.set(requestKey, duplicateRequests); } duplicateRequests.push(callbacks); } }); } getMaxResources() { return this.requests.size; } run(onloadListener, options) { const resourceURLs = [...this.requests.keys()]; let indexResource = 0; return Promise.all(resourceURLs.map(async requestKey => { const [resourceURL, asBinary, expectedType] = JSON.parse(requestKey); const resourceRequests = this.requests.get(requestKey); try { const currentIndexResource = indexResource; indexResource = indexResource + 1; const content = await util.getContent(resourceURL, { asBinary, expectedType, maxResourceSize: options.maxResourceSize, maxResourceSizeEnabled: options.maxResourceSizeEnabled, frameId: options.windowId, resourceReferrer: options.resourceReferrer }); onloadListener({ url: resourceURL }); if (!this.cancelled) { resourceRequests.forEach(callbacks => { const duplicateCallbacks = this.duplicates.get(requestKey); const duplicate = duplicateCallbacks && duplicateCallbacks.length > 1 && duplicateCallbacks.includes(callbacks); callbacks.resolve({ content: content.data, indexResource: currentIndexResource, duplicate }); }); } } catch (error) { indexResource = indexResource + 1; onloadListener({ url: resourceURL }); resourceRequests.forEach(resourceRequest => resourceRequest.reject(error)); } this.requests.delete(requestKey); })); } cancel() { this.cancelled = true; const resourceURLs = [...this.requests.keys()]; resourceURLs.forEach(requestKey => { const resourceRequests = this.requests.get(requestKey); resourceRequests.forEach(callbacks => callbacks.reject()); this.requests.delete(requestKey); }); } } // --------- // Processor // --------- const PREFIXES_FORBIDDEN_DATA_URI = ["data:text/"]; const PREFIX_DATA_URI_IMAGE_SVG = "data:image/svg+xml"; const EMPTY_IMAGE = "data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=="; const SCRIPT_TAG_FOUND = /