Prechádzať zdrojové kódy

added implemntation for selenium webdriver

Gildas 7 rokov pred
rodič
commit
de425062e7

+ 2 - 1
node-index.js

@@ -24,4 +24,5 @@
 /* global require, exports */
 
 exports.puppeteer = require("./node-puppeteer/single-file-puppeteer.js");
-exports.jsdom = require("./node-jsdom/single-file-jsdom.js");
+exports.jsdom = require("./node-jsdom/single-file-jsdom.js");
+exports.webdriver = require("./node-webdriver/single-file-webdriver.js");

+ 49 - 0
node-webdriver/example-webdriver.js

@@ -0,0 +1,49 @@
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ * 
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or 
+ *   modify it under the terms of the GNU Affero General Public License 
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ * 
+ *   The code in this file is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may 
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
+ *   AGPL normally required by section 4, provided you include this license 
+ *   notice and a URL through which recipients can access the Corresponding 
+ *   Source.
+ */
+
+/* global require */
+
+const fs = require("fs");
+
+const SingleFile = require("../node-index.js").webdriver;
+
+SingleFile.getPageData({
+	url: "https://github.com/gildas-lormeau/SingleFile",
+	// webdriverBrowser: "Chrome",
+	// userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko Firefox AppleWebKit (KHTML, like Gecko) Chrome Safari",
+	removeHiddenElements: true,
+	removeUnusedStyles: true,
+	removeUnusedFonts: true,
+	removeFrames: true,
+	removeImports: true,
+	removeScripts: true,
+	compressHTML: true,
+	compressCSS: true,
+	filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
+	removeAudioSrc: true,
+	removeVideoSrc: true,
+	removeAlternativeFonts: true,
+	removeAlternativeMedias: true,
+	removeAlternativeImages: true,
+	groupDuplicateImages: true
+}).then(pageData => fs.writeFileSync(pageData.filename, pageData.content));

+ 79 - 0
node-webdriver/single-file-webdriver.js

@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ * 
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or 
+ *   modify it under the terms of the GNU Affero General Public License 
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ * 
+ *   The code in this file is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may 
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
+ *   AGPL normally required by section 4, provided you include this license 
+ *   notice and a URL through which recipients can access the Corresponding 
+ *   Source.
+ */
+
+/* global require, exports */
+const fs = require("fs");
+
+const { Builder } = require("selenium-webdriver");
+
+const SCRIPTS = [
+	"../lib/single-file/util/doc-util.js",
+	"../lib/single-file/util/doc-helper.js",
+	"../lib/single-file/vendor/css-tree.js",
+	"../lib/single-file/vendor/html-srcset-parser.js",
+	"../lib/single-file/vendor/css-minifier.js",
+	"../lib/single-file/vendor/css-font-property-parser.js",
+	"../lib/single-file/vendor/css-media-query-parser.js",
+	"../lib/single-file/modules/html-minifier.js",
+	"../lib/single-file/modules/css-fonts-minifier.js",
+	"../lib/single-file/modules/css-fonts-alt-minifier.js",
+	"../lib/single-file/modules/css-matched-rules.js",
+	"../lib/single-file/modules/css-medias-alt-minifier.js",
+	"../lib/single-file/modules/css-rules-minifier.js",
+	"../lib/single-file/modules/html-images-alt-minifier.js",
+	"../lib/single-file/modules/html-serializer.js",
+	"../lib/single-file/single-file-core.js",
+	"../lib/single-file/single-file-browser.js"
+];
+
+exports.getPageData = async options => {
+	let driver;
+	try {
+		driver = await new Builder().forBrowser(options.webdriverBrowser).build();
+		await driver.get(options.url);
+		await Promise.all(SCRIPTS.map(scriptPath => driver.executeScript(fs.readFileSync(require.resolve(scriptPath)).toString())));
+		const pageData = await driver.executeAsyncScript(getPageDataScript(), options);
+		return pageData;
+	} finally {
+		if (driver) {
+			driver.quit();
+		}
+	}
+};
+
+function getPageDataScript() {
+	return `
+	const [options, callback] = arguments;
+	getPageData().then(pageData => callback(pageData))
+
+	async function getPageData() {
+		options.removeFrames = true;
+		const SingleFile = SingleFileBrowser.getClass();
+		const singleFile = new SingleFile(options);
+		await singleFile.initialize();
+		await singleFile.run();
+		const pageData = await singleFile.getPageData();
+		return pageData;
+	}
+	`;
+}