Parcourir la source

implemented CLI (WIP)

Gildas il y a 7 ans
Parent
commit
b4b8dca8c0

+ 115 - 0
cli/single-file-cli.js

@@ -0,0 +1,115 @@
+#!/usr/bin/env node
+
+/*
+ * Copyright 2010-2019 Gildas Lormeau
+ * contact : gildas.lormeau <at> gmail.com
+ *
+ * This file is part of SingleFile.
+ *
+ *   The code in this file is free software: you can redistribute it and/or
+ *   modify it under the terms of the GNU Affero General Public License
+ *   (GNU AGPL) as published by the Free Software Foundation, either version 3
+ *   of the License, or (at your option) any later version.
+ *
+ *   The code in this file is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
+ *   General Public License for more details.
+ *
+ *   As additional permission under GNU AGPL version 3 section 7, you may
+ *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
+ *   AGPL normally required by section 4, provided you include this license
+ *   notice and a URL through which recipients can access the Corresponding
+ *   Source.
+ */
+
+/* global require */
+
+const args = require("yargs")
+	.wrap(null)
+	.command("$0 <url> [output]", "Save a page into a single HTML file.", yargs => {
+		yargs.positional("url", { description: "URL of the page to save", type: "string" });
+		yargs.positional("output", { description: "Output filename", type: "string" });
+	})
+	.default({
+		"back-end": "puppeteer",
+		"browser-headless": true,
+		"browser-executable-path": "chrome",
+		"compress-CSS": true,
+		"compress-HTML": true,
+		"group-duplicate-images": true,
+		"load-deferred-images": true,
+		"load-deferred-images-max-idle-time": 1500,
+		"max-resource-size-enabled": false,
+		"max-resource-size": 10,
+		"remove-hidden-elements": true,
+		"remove-unused-styles": true,
+		"remove-unused-fonts": true,
+		"remove-frames": false,
+		"remove-imports": true,
+		"remove-scripts": true,
+		"remove-audio-src": true,
+		"remove-video-src": true,
+		"remove-alternative-fonts": true,
+		"remove-alternative-medias": true,
+		"remove-alternative-images": true,
+		"save-raw-page": false
+	})
+	.options("back-end", { description: "Back-end to use" })
+	.choices("back-end", ["jsdom", "puppeteer", "webdriver"])
+	.options("browser-headless", { description: "Run the browser in headless mode" })
+	.boolean("browser-headless")
+	.options("browser-executable-path", { description: "Path to chrome/chromium executable" })
+	.string("browser-executable-path")
+	.options("compress-CSS", { description: "Compress CSS stylesheets" })
+	.boolean("compress-CSS")
+	.options("compress-HTML", { description: "Compress HTML content" })
+	.boolean("compress-HTML")
+	.options("group-duplicate-images", { description: "Group duplicate images into CSS custom properties" })
+	.boolean("compress-HTML")
+	.options("load-deferred-images", { description: "Load deferred (aka lazy-loaded) images" })
+	.boolean("load-deferred-images")
+	.options("load-deferred-images-max-idle-time", { description: "Maximum delay of time to wait for deferred images" })
+	.number("load-deferred-images")
+	.options("max-resource-size-enabled", { description: "Enable removal of embedded resources exceeding a given size" })
+	.boolean("max-resource-size-enabled")
+	.options("max-resource-size", { description: "Maximum size of embedded resources (i.e. images, stylesheets, scripts and iframes)" })
+	.number("max-resource-size")
+	.options("remove-hidden-elements", { description: "Remove HTML elements which are not displayed" })
+	.number("remove-hidden-elements")
+	.options("remove-unused-styles", { description: "Remove unused CSS rules and unneeded declarations" })
+	.number("remove-unused-styles")
+	.options("remove-unused-fonts", { description: "Remove unused CSS font rules" })
+	.number("remove-unused-fonts")
+	.options("remove-frames", { description: "Remove frames" })
+	.number("remove-frames")
+	.options("remove-imports", { description: "Remove HTML imports" })
+	.number("remove-imports")
+	.options("remove-scripts", { description: "Remove JavaScript scripts" })
+	.number("remove-scripts")
+	.options("remove-audio-src", { description: "Remove source of audio elements" })
+	.number("remove-audio-src")
+	.options("remove-video-src", { description: "Remove source of video elements" })
+	.number("remove-video-src")
+	.options("remove-alternative-fonts", { description: "Remove alternative fonts to the ones displayed" })
+	.number("remove-alternative-fonts")
+	.options("remove-alternative-medias", { description: "Remove alternative CSS stylesheets" })
+	.number("remove-alternative-medias")
+	.options("remove-alternative-images", { description: "Remove images for alternative sizes of screen" })
+	.number("remove-alternative-images")
+	.options("save-raw-page", { description: "Save the original page without interpreting it into the browser" })
+	.number("save-raw-page")
+	.argv;
+
+const backEnds = {
+	jsdom: "./single-file-jsdom.js",
+	puppeteer: "./single-file-puppeteer.js",
+	webdriver: "./single-file-webdriver.js"
+};
+require(backEnds[args.backEnd]).getPageData(args).then(pageData => {
+	if (args.output) {
+		require("fs").writeFileSync(args.output, pageData.content);
+	} else {
+		console.log(pageData.content); // eslint-disable-line no-console
+	}
+});

+ 0 - 0
node-jsdom/single-file-jsdom.js → cli/single-file-jsdom.js


+ 0 - 0
node-puppeteer/single-file-puppeteer.js → cli/single-file-puppeteer.js


+ 0 - 0
node-webdriver/single-file-webdriver.js → cli/single-file-webdriver.js


+ 0 - 48
node-jsdom/example.-jsdom.js

@@ -1,48 +0,0 @@
-/*
- * Copyright 2010-2019 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require */
-
-const fs = require("fs");
-
-const SingleFile = require("../node-index.js").jsdom;
-
-SingleFile.getPageData({
-	url: "https://github.com/gildas-lormeau/SingleFile",
-	userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko Firefox AppleWebKit (KHTML, like Gecko) Chrome Safari",
-	removeHiddenElements: true,
-	removeUnusedStyles: true,
-	removeUnusedFonts: true,
-	removeFrames: true,
-	removeImports: true,
-	removeScripts: true,
-	compressHTML: true,
-	compressCSS: true,
-	filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
-	removeAudioSrc: true,
-	removeVideoSrc: true,
-	removeAlternativeFonts: true,
-	removeAlternativeMedias: true,
-	removeAlternativeImages: true,
-	groupDuplicateImages: true
-}).then(pageData => fs.writeFileSync(pageData.filename, pageData.content));

+ 0 - 51
node-puppeteer/example-puppeteer.js

@@ -1,51 +0,0 @@
-/*
- * Copyright 2010-2019 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require */
-
-const fs = require("fs");
-
-const SingleFile = require("../node-index.js").puppeteer;
-
-SingleFile.getPageData({
-	url: "https://github.com/gildas-lormeau/SingleFile",
-	// browserExecutablePath: "C:/Users/Gildas/AppData/Local/Google/Chrome SxS/Application/chrome.exe",
-	// userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko Firefox AppleWebKit (KHTML, like Gecko) Chrome Safari",
-	// browserHeadless: false,
-	removeHiddenElements: true,
-	removeUnusedStyles: true,
-	removeUnusedFonts: true,
-	removeFrames: false,
-	loadDeferredImages: true,
-	removeImports: true,
-	removeScripts: true,
-	compressHTML: true,
-	compressCSS: true,
-	filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
-	removeAudioSrc: true,
-	removeVideoSrc: true,
-	removeAlternativeFonts: true,
-	removeAlternativeMedias: true,
-	removeAlternativeImages: true,
-	groupDuplicateImages: true
-}).then(pageData => fs.writeFileSync(pageData.filename, pageData.content));

+ 0 - 49
node-webdriver/example-webdriver.js

@@ -1,49 +0,0 @@
-/*
- * Copyright 2010-2019 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require */
-
-const fs = require("fs");
-
-const SingleFile = require("../node-index.js").webdriver;
-
-SingleFile.getPageData({
-	url: "https://github.com/gildas-lormeau/SingleFile",
-	// userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Gecko Firefox AppleWebKit (KHTML, like Gecko) Chrome Safari",
-	// browserHeadless: false,
-	removeHiddenElements: true,
-	removeUnusedStyles: true,
-	removeUnusedFonts: true,
-	loadDeferredImages: true,
-	removeImports: true,
-	removeScripts: true,
-	compressHTML: true,
-	compressCSS: true,
-	filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
-	removeAudioSrc: true,
-	removeVideoSrc: true,
-	removeAlternativeFonts: true,
-	removeAlternativeMedias: true,
-	removeAlternativeImages: true,
-	groupDuplicateImages: true
-}).then(pageData => fs.writeFileSync(pageData.filename, pageData.content));

+ 3 - 6
package.json

@@ -1,11 +1,7 @@
 {
 	"name": "single-file",
 	"version": "0.0.1",
-	"description": "SingleFile for Node.js",
-	"main": "./node-index.js",
-	"scripts": {
-		"test": "echo \"Error: no test specified\" && exit 1"
-	},
+	"description": "SingleFile",
 	"author": "Gildas Lormeau",
 	"license": "AGPL-3.0-or-later",
 	"dependencies": {
@@ -14,6 +10,7 @@
 		"puppeteer-core": "*",
 		"request-promise-native": "*",
 		"selenium-webdriver": "*",
-		"strong-data-uri": "*"
+		"strong-data-uri": "*",
+		"yargs": "*"
 	}
 }