Prechádzať zdrojové kódy

added support of webdriver/firefox

Gildas 7 rokov pred
rodič
commit
c2c1963ad7

+ 1 - 1
cli/back-ends/puppeteer.js

@@ -64,7 +64,7 @@ exports.getPageData = async options => {
 		browserOptions.args = ["--disable-web-security"];
 	}
 	if (options.browserExecutablePath) {
-		browserOptions.executablePath = options.browserExecutablePath;
+		browserOptions.executablePath = options.browserExecutablePath || "chrome";
 	}
 	let browser;
 	try {

+ 3 - 0
cli/back-ends/webdriver.js → cli/back-ends/webdriver-chrome.js

@@ -69,6 +69,9 @@ exports.getPageData = async options => {
 		if (options.userAgent) {
 			await chromeOptions.addArguments("--user-agent=" + JSON.stringify(options.userAgent));
 		}
+		if (options.browserExecutablePath) {
+			chromeOptions.setChromeBinaryPath(options.browserExecutablePath);
+		}
 		builder.setChromeOptions(chromeOptions);
 		driver = await builder.forBrowser("chrome").build();
 		if (options.browserWidth && options.browserHeight) {

+ 138 - 0
cli/back-ends/webdriver-firefox.js

@@ -0,0 +1,138 @@
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ * 
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or 
+ *   modify it under the terms of the GNU Affero General Public License 
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ * 
+ *   The code in this file is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may 
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
+ *   AGPL normally required by section 4, provided you include this license 
+ *   notice and a URL through which recipients can access the Corresponding 
+ *   Source.
+ */
+
+/* global require, exports */
+const fs = require("fs");
+
+const firefox = require("selenium-webdriver/firefox");
+const { Builder } = require("selenium-webdriver");
+
+const SCRIPTS = [
+	"../../lib/hooks/hooks-frame.js",
+	"../../lib/frame-tree/frame-tree.js",
+	"../../lib/lazy/content/content-lazy-loader.js",
+	"../../lib/single-file/util/doc-util.js",
+	"../../lib/single-file/util/doc-helper.js",
+	"../../lib/single-file/util/timeout.js",
+	"../../lib/single-file/vendor/css-tree.js",
+	"../../lib/single-file/vendor/html-srcset-parser.js",
+	"../../lib/single-file/vendor/css-minifier.js",
+	"../../lib/single-file/vendor/css-font-property-parser.js",
+	"../../lib/single-file/vendor/css-media-query-parser.js",
+	"../../lib/single-file/modules/html-minifier.js",
+	"../../lib/single-file/modules/css-fonts-minifier.js",
+	"../../lib/single-file/modules/css-fonts-alt-minifier.js",
+	"../../lib/single-file/modules/css-matched-rules.js",
+	"../../lib/single-file/modules/css-medias-alt-minifier.js",
+	"../../lib/single-file/modules/css-rules-minifier.js",
+	"../../lib/single-file/modules/html-images-alt-minifier.js",
+	"../../lib/single-file/modules/html-serializer.js",
+	"../../lib/single-file/single-file-core.js",
+	"../../lib/single-file/single-file-browser.js"
+];
+
+exports.getPageData = async options => {
+	const RESOLVED_CONTENTS = {
+		"lib/lazy/web/web-lazy-loader-before.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-before.js")).toString(),
+		"lib/lazy/web/web-lazy-loader-after.js": fs.readFileSync(require.resolve("../../lib/lazy/web/web-lazy-loader-after.js")).toString()
+	};
+	let driver;
+	try {
+		const builder = new Builder();
+		const firefoxOptions = new firefox.Options();
+		if (options.browserHeadless === undefined || options.browserHeadless) {
+			firefoxOptions.headless();
+		}
+		if (options.browserExecutablePath) {
+			firefoxOptions.setBinary(options.browserExecutablePath);
+		}
+		if (options.browserDisableWebSecurity === undefined || options.browserDisableWebSecurity) {
+			// not supported
+		}
+		if (options.userAgent) {
+			const profile = new firefox.Profile();
+			profile.setPreference("general.useragent.override", options.userAgent);
+			firefoxOptions.setProfile(profile);
+		}
+		builder.setFirefoxOptions(firefoxOptions);
+		driver = await builder.forBrowser("firefox").build();
+		if (options.browserWidth && options.browserHeight) {
+			const window = driver.manage().window();
+			if (window.setRect) {
+				window.setRect(options.browserHeight, options.browserWidth);
+			} else if (window.setSize) {
+				window.setSize(options.browserWidth, options.browserHeight);
+			}
+		}
+		await driver.get(options.url);
+		let scripts = SCRIPTS.map(scriptPath => fs.readFileSync(require.resolve(scriptPath)).toString().replace(/\n(this)\.([^ ]+) = (this)\.([^ ]+) \|\|/g, "\nwindow.$2 = window.$4 ||")).join("\n");
+		scripts += "\nlazyLoader.getScriptContent = " + (function (path) { return (RESOLVED_CONTENTS)[path]; }).toString().replace("RESOLVED_CONTENTS", JSON.stringify(RESOLVED_CONTENTS)) + ";";
+		const mainWindowHandle = driver.getWindowHandle();
+		const windowHandles = await driver.getAllWindowHandles();
+		await Promise.all(windowHandles.map(async windowHandle => {
+			await driver.switchTo().window(windowHandle);
+			driver.executeScript(scripts);
+		}));
+		await driver.switchTo().window(mainWindowHandle);
+		driver.executeScript(scripts);
+		const result = await driver.executeAsyncScript(getPageDataScript(), options);
+		if (result.error) {
+			throw result.error;
+		} else {
+			return result.pageData;
+		}
+	} finally {
+		if (driver) {
+			driver.quit();
+		}
+	}
+};
+
+function getPageDataScript() {
+	return `
+	const [options, callback] = arguments;
+	getPageData()
+		.then(pageData => callback({ pageData }))
+		.catch(error => callback({ error: error.toString() }));
+
+	async function getPageData() {
+		options.insertSingleFileComment = true;
+		const preInitializationPromises = [];
+		if (!options.saveRawPage) {
+			if (!options.removeFrames) {
+				preInitializationPromises.push(frameTree.getAsync(options));
+			}
+			if (options.loadDeferredImages) {
+				preInitializationPromises.push(lazyLoader.process(options));
+			}
+		}
+		[options.framesData] = await Promise.all(preInitializationPromises);
+		options.doc = document;
+		options.win = window;
+		const SingleFile = SingleFileBrowser.getClass();
+		const singleFile = new SingleFile(options);
+		await singleFile.run();
+		return await singleFile.getPageData();
+	}
+	`;
+}

+ 4 - 3
cli/single-file.js

@@ -34,7 +34,7 @@ const args = require("yargs")
 	.default({
 		"back-end": "puppeteer",
 		"browser-headless": true,
-		"browser-executable-path": "chrome",
+		"browser-executable-path": "",
 		"browser-width": 1280,
 		"browser-height": 720,
 		"browser-wait-until": "networkidle0",
@@ -59,7 +59,7 @@ const args = require("yargs")
 		"save-raw-page": false
 	})
 	.options("back-end", { description: "Back-end to use" })
-	.choices("back-end", ["jsdom", "puppeteer", "webdriver"])
+	.choices("back-end", ["jsdom", "puppeteer", "webdriver-chrome", "webdriver-firefox"])
 	.options("browser-headless", { description: "Run the browser in headless mode (puppeteer, webdriver)" })
 	.boolean("browser-headless")
 	.options("browser-executable-path", { description: "Path to chrome/chromium executable (puppeteer)" })
@@ -113,7 +113,8 @@ const args = require("yargs")
 const backEnds = {
 	jsdom: "./back-ends/jsdom.js",
 	puppeteer: "./back-ends/puppeteer.js",
-	webdriver: "./back-ends/webdriver.js"
+	"webdriver-chrome": "./back-ends/webdriver-chrome.js",
+	"webdriver-firefox": "./back-ends/webdriver-firefox.js"
 };
 require(backEnds[args.backEnd]).getPageData(args).then(pageData => {
 	if (args.output) {