Răsfoiți Sursa

added puppeteer integration example

Gildas 7 ani în urmă
părinte
comite
cc38fc36d8

+ 1 - 1
node-jsdom/example.js → node-jsdom/example.-jsdom.js

@@ -25,7 +25,7 @@
 
 const fs = require("fs");
 
-const SingleFile = require("./single-file-jsdom.js");
+const SingleFile = require("./single-file-puppeteer.js");
 
 SingleFile.getPageData({
 	url: "https://github.com/gildas-lormeau/SingleFile",

+ 1 - 1
node-jsdom/single-file-jsdom.js

@@ -91,7 +91,7 @@ exports.getPageData = async options => {
 	options.win = dom.window;
 	options.doc = dom.window.document;
 	options.saveRawPage = true;
-	options.loadDeferredImages = false;
+	options.removeFrames = true;
 	const singleFile = new SingleFile(options);
 	await singleFile.initialize();
 	await singleFile.run();

+ 51 - 0
node-puppeteer/example-puppeteer.js

@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ * 
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or 
+ *   modify it under the terms of the GNU Affero General Public License 
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ * 
+ *   The code in this file is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may 
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
+ *   AGPL normally required by section 4, provided you include this license 
+ *   notice and a URL through which recipients can access the Corresponding 
+ *   Source.
+ */
+
+/* global require */
+
+const fs = require("fs");
+
+const SingleFile = require("./single-file-puppeteer.js");
+
+SingleFile.getPageData({
+	url: "https://fr.wikipedia.org/wiki/Data_URI_scheme",
+	// puppeteerExecutablePath: "C:/Users/Gildas/AppData/Local/Google/Chrome SxS/Application/chrome.exe",
+	// puppeteerWaitUntil: "networkidle0",
+	// puppeteerHeadless: false,
+	// userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko Firefox AppleWebKit (KHTML, like Gecko) Chrome Safari",
+	removeHiddenElements: true,
+	removeUnusedStyles: true,
+	removeUnusedFonts: true,
+	removeFrames: true,
+	removeImports: true,
+	removeScripts: true,
+	compressHTML: true,
+	compressCSS: true,
+	filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
+	removeAudioSrc: true,
+	removeVideoSrc: true,
+	removeAlternativeFonts: true,
+	removeAlternativeMedias: true,
+	removeAlternativeImages: true,
+	groupDuplicateImages: true
+}).then(pageData => fs.writeFileSync(pageData.filename, pageData.content));

+ 74 - 0
node-puppeteer/single-file-puppeteer.js

@@ -0,0 +1,74 @@
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ * 
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or 
+ *   modify it under the terms of the GNU Affero General Public License 
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ * 
+ *   The code in this file is distributed in the hope that it will be useful, 
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may 
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
+ *   AGPL normally required by section 4, provided you include this license 
+ *   notice and a URL through which recipients can access the Corresponding 
+ *   Source.
+ */
+
+/* global require, exports, SingleFileBrowser */
+const fs = require("fs");
+
+const puppeteer = require("puppeteer-core");
+
+const SCRIPTS = [
+	"./lib/single-file/util/doc-util.js",
+	"./lib/single-file/util/doc-helper.js",
+	"./lib/single-file/vendor/css-tree.js",
+	"./lib/single-file/vendor/html-srcset-parser.js",
+	"./lib/single-file/vendor/css-minifier.js",
+	"./lib/single-file/vendor/css-font-property-parser.js",
+	"./lib/single-file/vendor/css-media-query-parser.js",
+	"./lib/single-file/modules/html-minifier.js",
+	"./lib/single-file/modules/css-fonts-minifier.js",
+	"./lib/single-file/modules/css-fonts-alt-minifier.js",
+	"./lib/single-file/modules/css-matched-rules.js",
+	"./lib/single-file/modules/css-medias-alt-minifier.js",
+	"./lib/single-file/modules/css-rules-minifier.js",
+	"./lib/single-file/modules/html-images-alt-minifier.js",
+	"./lib/single-file/modules/html-serializer.js",
+	"./lib/single-file/single-file-core.js",
+	"./lib/single-file/single-file-browser.js"
+];
+
+exports.getPageData = async options => {
+	const browserOptions = {
+		headless: options.puppeteerHeadless === undefined ? true : options.puppeteerHeadless
+	};
+	if (options.puppeteerExecutablePath) {
+		browserOptions.executablePath = options.puppeteerExecutablePath;
+	}
+	const browser = await puppeteer.launch(browserOptions);
+	const page = await browser.newPage();
+	if (options.userAgent) {
+		await page.setUserAgent(options.userAgent);
+	}
+	await page.goto(options.url, {
+		waitUntil: options.puppeteerWaitUntil || "networkidle0"
+	});
+	await Promise.all(SCRIPTS.map(scriptPath => page.evaluate(fs.readFileSync(scriptPath).toString())));
+	const pageData = await page.evaluate(async options => {
+		options.removeFrames = true;
+		const SingleFile = SingleFileBrowser.getClass();
+		const singleFile = new SingleFile(options);
+		await singleFile.initialize();
+		await singleFile.run();
+		return singleFile.getPageData();
+	}, options);
+	return pageData;
+};

+ 5 - 4
package.json

@@ -9,9 +9,10 @@
 	"author": "Gildas Lormeau",
 	"license": "AGPL-3.0-or-later",
 	"dependencies": {
-		"jsdom": "*",
-		"strong-data-uri": "*",
 		"iconv-lite": "*",
-		"request-promise-native": "*"
+		"jsdom": "*",
+		"puppeteer-core": "*",
+		"request-promise-native": "*",
+		"strong-data-uri": "*"
 	}
-}
+}