Sfoglia il codice sorgente

improved saving of lazy-loaded images with puppeter and webdriver

Gildas 7 anni fa
parent
commit
088e121b3e

+ 18 - 7
cli/back-ends/puppeteer.js

@@ -21,7 +21,7 @@
  *   Source.
  */
 
-/* global require, exports, SingleFileBrowser, frameTree, document, window */
+/* global require, exports, SingleFileBrowser, frameTree, lazyLoader, document, window */
 
 const fs = require("fs");
 
@@ -30,6 +30,7 @@ const puppeteer = require("puppeteer-core");
 const SCRIPTS = [
 	"../../lib/hooks/hooks-frame.js",
 	"../../lib/frame-tree/frame-tree.js",
+	"../../lib/lazy/content/content-lazy-loader.js",
 	"../../lib/single-file/util/doc-util.js",
 	"../../lib/single-file/util/doc-helper.js",
 	"../../lib/single-file/util/timeout.js",
@@ -51,6 +52,10 @@ const SCRIPTS = [
 ];
 
 exports.getPageData = async options => {
+	const RESOLVED_CONTENTS = {
+		"lib/lazy/web/web-lazy-loader-before.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-before.js")).toString(),
+		"lib/lazy/web/web-lazy-loader-after.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-after.js")).toString()
+	};
 	const browserOptions = {};
 	if (options.browserHeadless !== undefined) {
 		browserOptions.headless = options.browserHeadless;
@@ -77,19 +82,25 @@ exports.getPageData = async options => {
 		if (options.browserBypassCSP === undefined || options.browserBypassCSP) {
 			await page.setBypassCSP(true);
 		}
-		if (options.loadDeferredImages) {
-			SCRIPTS.unshift("../../lib/lazy/web/web-lazy-loader-before");
-		}
-		await Promise.all(SCRIPTS.map(scriptPath => page.evaluateOnNewDocument(fs.readFileSync(require.resolve(scriptPath)).toString())));
+		let scripts = SCRIPTS.map(scriptPath => fs.readFileSync(require.resolve(scriptPath)).toString()).join("\n");
+		scripts += "\nlazyLoader.getScriptContent = " + (function (path) { return (RESOLVED_CONTENTS)[path]; }).toString().replace("RESOLVED_CONTENTS", JSON.stringify(RESOLVED_CONTENTS)) + ";";
+		await page.evaluateOnNewDocument(scripts);
 		await page.goto(options.url, {
 			waitUntil: "networkidle0"
 		});
 		return await page.evaluate(async options => {
 			options.insertSingleFileComment = true;
 			options.insertFaviconLink = true;
-			if (!options.saveRawPage && !options.removeFrames) {
-				options.framesData = await frameTree.getAsync(options);
+			const preInitializationPromises = [];
+			if (!options.saveRawPage) {
+				if (!options.removeFrames) {
+					preInitializationPromises.push(frameTree.getAsync(options));
+				}
+				if (options.loadDeferredImages) {
+					preInitializationPromises.push(lazyLoader.process(options));
+				}
 			}
+			[options.framesData] = await Promise.all(preInitializationPromises);
 			options.doc = document;
 			options.win = window;
 			const SingleFile = SingleFileBrowser.getClass();

+ 16 - 6
cli/back-ends/webdriver.js

@@ -29,6 +29,7 @@ const { Builder } = require("selenium-webdriver");
 
 const SCRIPTS = [
 	"../../lib/frame-tree/frame-tree.js",
+	"../../lib/lazy/content/content-lazy-loader.js",
 	"../../lib/single-file/util/doc-util.js",
 	"../../lib/single-file/util/doc-helper.js",
 	"../../lib/single-file/util/timeout.js",
@@ -50,6 +51,10 @@ const SCRIPTS = [
 ];
 
 exports.getPageData = async options => {
+	const RESOLVED_CONTENTS = {
+		"lib/lazy/web/web-lazy-loader-before.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-before.js")).toString(),
+		"lib/lazy/web/web-lazy-loader-after.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-after.js")).toString()
+	};
 	let driver;
 	try {
 		const builder = new Builder();
@@ -75,9 +80,7 @@ exports.getPageData = async options => {
 		}
 		await driver.get(options.url);
 		let scripts = (await Promise.all(SCRIPTS.map(scriptPath => fs.readFileSync(require.resolve(scriptPath)).toString()))).join("\n");
-		if (options.loadDeferredImages) {
-			scripts += "\ntry {\n" + fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-before")) + "\n} catch (error) {}";
-		}
+		scripts += "\nlazyLoader.getScriptContent = " + (function (path) { return (RESOLVED_CONTENTS)[path]; }).toString().replace("RESOLVED_CONTENTS", JSON.stringify(RESOLVED_CONTENTS)) + ";";
 		const mainWindowHandle = driver.getWindowHandle();
 		const windowHandles = await driver.getAllWindowHandles();
 		await Promise.all(windowHandles.map(async windowHandle => {
@@ -104,9 +107,16 @@ function getPageDataScript() {
 	async function getPageData() {
 		options.insertSingleFileComment = true;
 		options.insertFaviconLink = true;
-		if (!options.saveRawPage && !options.removeFrames) {
-			options.framesData = await frameTree.getAsync(options);
-		}	
+		const preInitializationPromises = [];
+		if (!options.saveRawPage) {
+			if (!options.removeFrames) {
+				preInitializationPromises.push(frameTree.getAsync(options));
+			}
+			if (options.loadDeferredImages) {
+				preInitializationPromises.push(lazyLoader.process(options));
+			}
+		}
+		[options.framesData] = await Promise.all(preInitializationPromises);
 		options.doc = document;
 		options.win = window;
 		const SingleFile = SingleFileBrowser.getClass();

+ 8 - 3
extension/core/bg/core.js

@@ -58,7 +58,8 @@ singlefile.core = (() => {
 			"/lib/single-file/vendor/css-minifier.js"
 		],
 		loadDeferredImages: [
-			"/lib/lazy/content/content-lazy-loader.js"
+			"/lib/lazy/content/content-lazy-loader.js",
+			() => this.lazyLoader.getScriptPath = path => browser.runtime.getURL(path)
 		],
 		removeAlternativeImages: [
 			"/lib/single-file/modules/html-images-alt-minifier.js"
@@ -115,8 +116,12 @@ singlefile.core = (() => {
 	}
 
 	async function executeContentScripts(tabId, scriptFiles, allFrames, runAt) {
-		for (const file of scriptFiles) {
-			await browser.tabs.executeScript(tabId, { file, allFrames, runAt });
+		for (const script of scriptFiles) {
+			if (typeof script == "function") {
+				await browser.tabs.executeScript(tabId, { code: "(" + script.toString() + ")()", allFrames, runAt });
+			} else {
+				await browser.tabs.executeScript(tabId, { file: script, allFrames, runAt });
+			}
 		}
 	}
 

+ 15 - 15
extension/core/content/content-bootstrap.js

@@ -97,21 +97,21 @@ this.singlefile.bootstrap = this.singlefile.bootstrap || (async () => {
 		if (!singlefile.pageAutoSaved || options.autoSaveUnload) {
 			options.sessionId = 0;
 			const docData = docHelper.preProcessDoc(document, window, options);
-			const framesData = this.frameTree && !options.removeFrames && frameTree.getSync(options);
-			browser.runtime.sendMessage({
-				autoSaveContent: true,
-				content: docHelper.serialize(document),
-				canvasData: docData.canvasData,
-				fontsData: docData.fontsData,
-				stylesheetContents: docData.stylesheetContents,
-				imageData: docData.imageData,
-				postersData: docData.postersData,
-				usedFonts: docData.usedFonts,
-				shadowRootContents: docData.shadowRootContents,
-				framesData,
-				url: location.href
-			});
+			if (this.frameTree && !options.removeFrames) {
+				browser.runtime.sendMessage({
+					autoSaveContent: true,
+					content: docHelper.serialize(document),
+					canvasData: docData.canvasData,
+					fontsData: docData.fontsData,
+					stylesheetContents: docData.stylesheetContents,
+					imageData: docData.imageData,
+					postersData: docData.postersData,
+					usedFonts: docData.usedFonts,
+					shadowRootContents: docData.shadowRootContents,
+					framesData: frameTree.getSync(options),
+					url: location.href
+				});
+			}
 		}
 	}
-
 })();

+ 35 - 22
lib/lazy/content/content-lazy-loader.js

@@ -18,7 +18,7 @@
  *   along with SingleFile.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* global browser, document, MutationObserver */
+/* global browser, document, MutationObserver, setTimeout, clearTimeout, lazyLoader */
 
 this.lazyLoader = this.lazyLoader || (() => {
 
@@ -34,8 +34,8 @@ this.lazyLoader = this.lazyLoader || (() => {
 	function process(options) {
 		return new Promise(async resolve => {
 			let timeoutId, srcAttributeChanged;
-			setTimeout(() => {
-				clearTimeout(timeoutId);
+			setAsyncTimeout(() => {
+				clearAsyncTimeout(timeoutId);
 				lazyLoadEnd(idleTimeoutId, observer, resolve);
 			}, options.loadDeferredImagesMaxIdleTime * 5);
 			const observer = new MutationObserver(async mutations => {
@@ -56,9 +56,9 @@ this.lazyLoader = this.lazyLoader || (() => {
 				}
 			});
 			observer.observe(document, { subtree: true, childList: true, attributes: true });
-			const idleTimeoutId = await setTimeout(() => {
+			const idleTimeoutId = await setAsyncTimeout(() => {
 				if (!srcAttributeChanged) {
-					clearTimeout(timeoutId);
+					clearAsyncTimeout(timeoutId);
 					lazyLoadEnd(idleTimeoutId, observer, resolve);
 				}
 			}, options.loadDeferredImagesMaxIdleTime * 1.2);
@@ -67,38 +67,51 @@ this.lazyLoader = this.lazyLoader || (() => {
 	}
 
 	async function deferLazyLoadEnd(timeoutId, idleTimeoutId, observer, options, resolve) {
-		await clearTimeout(timeoutId);
-		return setTimeout(async () => await lazyLoadEnd(idleTimeoutId, observer, resolve), options.loadDeferredImagesMaxIdleTime);
+		await clearAsyncTimeout(timeoutId);
+		return setAsyncTimeout(async () => await lazyLoadEnd(idleTimeoutId, observer, resolve), options.loadDeferredImagesMaxIdleTime);
 	}
 
 	function lazyLoadEnd(idleTimeoutId, observer, resolve) {
-		clearTimeout(idleTimeoutId);
+		clearAsyncTimeout(idleTimeoutId);
 		injectScript(SCRIPT_AFTER_PATH);
-		setTimeout(resolve, 100);
+		setAsyncTimeout(resolve, 100);
 		observer.disconnect();
 	}
 
 	function injectScript(path) {
 		const scriptElement = document.createElement(SCRIPT_TAG_NAME);
-		scriptElement.src = browser.runtime.getURL(path);
+		if (lazyLoader.getScriptPath) {
+			scriptElement.src = lazyLoader.getScriptPath(path);
+		} else {
+			debugger
+			scriptElement.textContent = lazyLoader.getScriptContent(path);
+		}
 		(document.documentElement || document).appendChild(scriptElement);
 		scriptElement.onload = () => scriptElement.remove();
 	}
 
-	async function setTimeout(callback, delay) {
-		const timeoutId = await browser.runtime.sendMessage({ setTimeoutRequest: true, delay });
-		const timeoutCallback = message => {
-			if (message.onTimeout && message.id == timeoutId) {
-				browser.runtime.onMessage.removeListener(timeoutCallback);
-				callback();
-			}
-		};
-		browser.runtime.onMessage.addListener(timeoutCallback);
-		return timeoutId;
+	async function setAsyncTimeout(callback, delay) {
+		if (this.browser && browser.runtime && browser.runtime.sendMessage) {
+			const timeoutId = await browser.runtime.sendMessage({ setTimeoutRequest: true, delay });
+			const timeoutCallback = message => {
+				if (message.onTimeout && message.id == timeoutId) {
+					browser.runtime.onMessage.removeListener(timeoutCallback);
+					callback();
+				}
+			};
+			browser.runtime.onMessage.addListener(timeoutCallback);
+			return timeoutId;
+		} else {
+			return setTimeout(callback, delay);
+		}
 	}
 
-	async function clearTimeout(timeoutId) {
-		await browser.runtime.sendMessage({ clearTimeout: true, id: timeoutId });
+	async function clearAsyncTimeout(timeoutId) {
+		if (this.browser && browser && browser.runtime && browser.runtime.sendMessage) {
+			await browser.runtime.sendMessage({ clearTimeout: true, id: timeoutId });
+		} else {
+			return clearTimeout(timeoutId);
+		}
 	}
 
 })();