ソースを参照

use "single-file-cli"

Gildas 3 年 前
コミット
272bab7abe

+ 15 - 15
README.MD

@@ -48,8 +48,8 @@ these instructions:
 
 - Firefox:
   https://extensionworkshop.com/documentation/develop/temporary-installation-in-firefox/
-- Chrome: https://developer.chrome.com/docs/extensions/mv3/getstarted/#unpacked (omit the
-  manifest creation)
+- Chrome: https://developer.chrome.com/docs/extensions/mv3/getstarted/#unpacked
+  (omit the manifest creation)
 - Microsoft Edge:
   https://docs.microsoft.com/en-us/microsoft-edge/extensions-chromium/getting-started/extension-sideloading
 
@@ -167,8 +167,7 @@ Please follow these steps if you find an unknown issue:
 ## Command Line Interface
 
 You can save web pages to HTML from the command line interface. See here for
-more info:
-https://github.com/gildas-lormeau/SingleFile/blob/master/cli/README.MD.
+more info: https://github.com/gildas-lormeau/single-file-cli/README.MD.
 
 ## Integration with user scripts
 
@@ -187,17 +186,18 @@ view the page in a browser that would not support pages saved with SingleFileZ.
 More info here: https://github.com/gildas-lormeau/SingleFileZ
 
 ## File format comparison
-|   	                                                                          | HTML (SingleFile)  | HTML (SingleFileZ) | MAFF  | MHTML | Webarchive (Safari) | HTML+folder |
-| ---                                	                                          |       :---:        |       :---:        | :---: | :---: |         :---:       |    :---:    |
-| Pages are saved as a single file                                              | ✓ 	               | ✓ 	                | ✓     | ✓     | ✓                   |             | 
-| HTML and styles are minified                                                  | ✓                  | ✓ 	                |       |       |   	                |             |
-| Unused HTML and styles are removed from files                                 | ✓                  | ✓ 	                |       |       |                     |   	         |
-| Binary resources are not encoded in base 64                                   |                    | ✓ 	                | ✓     |       | ✓                   | ✓ 	         |
-| Files are compressed                                                          |                    | ✓ 	                | ✓     |       |                     |   	         |
-| Files can be viewed without installing any extension                          | ✓                  | ✓¹                 |       | ✓²    | ✓³                  | ✓           |
-| Files can be viewed without running JavaScript                                | ✓                  |         	          | ✓     | ✓     | ✓                   | ✓ 	         |
-| Files can be unzipped to extract resources and view pages                     |                    | ✓ 	                | ✓     |       |                     | n/a         |
-| Files contains the text of the page (plain or formatted) which can be indexed | ✓ 	               | ✓⁴                 |       | ✓     | ✓ 	                | ✓ 	         |
+
+| 	 | HTML (SingleFile) | HTML (SingleFileZ) | MAFF | MHTML | Webarchive
+(Safari) | HTML+folder | | --- 	 | :---: | :---: | :---: | :---: | :---: | :---:
+| | Pages are saved as a single file | ✓ 	 | ✓ 	 | ✓ | ✓ | ✓ | | | HTML and
+styles are minified | ✓ | ✓ 	 | | | 	 | | | Unused HTML and styles are removed
+from files | ✓ | ✓ 	 | | | | 	 | | Binary resources are not encoded in base 64 |
+| ✓ 	 | ✓ | | ✓ | ✓ 	 | | Files are compressed | | ✓ 	 | ✓ | | | 	 | | Files can
+be viewed without installing any extension | ✓ | ✓¹ | | ✓² | ✓³ | ✓ | | Files
+can be viewed without running JavaScript | ✓ | 	 | ✓ | ✓ | ✓ | ✓ 	 | | Files can
+be unzipped to extract resources and view pages | | ✓ 	 | ✓ | | | n/a | | Files
+contains the text of the page (plain or formatted) which can be indexed | ✓ 	 |
+✓⁴ | | ✓ | ✓ 	 | ✓ 	 |
 
 Footnotes:
 

+ 4 - 127
cli/README.MD

@@ -1,129 +1,6 @@
-# SingleFile (Command Line Interface)
+# SingleFile CLI (Command Line Interface)
 
-## Introduction
+This folder is present for backward compatibility reasons and is an interface of
+[`single-file-cli`](https://github.com/gildas-lormeau/single-file-cli).
 
-SingleFile can be launched from the command line by running it into a (headless) browser. It runs through Node.js as a standalone script injected into the web page instead of being embedded into a WebExtension. To connect to the browser, it can use [Puppeteer](https://github.com/GoogleChrome/puppeteer) or [Selenium WebDriver](https://www.npmjs.com/package/selenium-webdriver). Alternatively, it can also emulate a browser with JavaScript disabled by using [jsdom](https://github.com/jsdom/jsdom).
-  
-## Installation with Docker
-
-- Installation from Docker Hub
-
-  `docker pull capsulecode/singlefile`
-  
-  `docker tag capsulecode/singlefile singlefile`
-  
-- Manual installation
-
-  `git clone --depth 1 --recursive https://github.com/gildas-lormeau/SingleFile.git`
-  
-  `cd SingleFile/cli`
-
-  `docker build --no-cache -t singlefile .`
-
-- Run
-
-  `docker run singlefile "https://www.wikipedia.org"`
-  
-- Run and redirect the result into a file
-
-  `docker run singlefile "https://www.wikipedia.org" > wikipedia.html`
-
-- Run and mount a volume to get the saved file in the current directory
-
-  - Save one page
-
-    `docker run -v %cd%:/usr/src/app/out singlefile "https://www.wikipedia.org" wikipedia.html` (Windows)
-
-    `docker run -v $(pwd):/usr/src/app/out singlefile "https://www.wikipedia.org" wikipedia.html` (Linux/UNIX)
-
-  - Save one or multiple pages by using the filename template (see `--filename-template` option)
-
-    `docker run -v %cd%:/usr/src/app/out singlefile "https://www.wikipedia.org" --dump-content=false` (Windows)
-
-    `docker run -v $(pwd):/usr/src/app/out singlefile "https://www.wikipedia.org" --dump-content=false` (Linux/UNIX)
-
-- An alternative docker file can be found here https://github.com/screenbreak/SingleFile-dockerized. It allows you to save pages from the command line interface or through an HTTP server.
-
-## Manual installation
-
-- Make sure Chrome or Firefox is installed and the executable can be found through the `PATH` environment variable. Otherwise you will need to set the `--browser-executable-path` option to help SingleFile locating it. As an alternative to Chrome and Firefox, you can use jsdom by setting the `--back-end` option to `jsdom`.
-
-- Install [Node.js](https://nodejs.org)
-
-- There are 3 ways to download the code of SingleFile, choose the one you prefer (`npm` is installed with Node.js):
-
-  - Download and install globally with `npm`
-    
-    `npm install -g "gildas-lormeau/SingleFile#master"`
-  
-  - Download and unzip manually the [master archive](https://github.com/gildas-lormeau/SingleFile/archive/master.zip) provided by Github      
-
-    `unzip master.zip .`
-  
-    `cd SingleFile-master`
-  
-    `npm install`
-  
-    `cd cli`    
-    
-  - Download with `git`
-
-    `git clone --depth 1 --recursive https://github.com/gildas-lormeau/SingleFile.git`
-  
-    `cd SingleFile`
-    
-    `npm install`
-  
-    `cd cli`           
-  
-- Make `single-file` executable (Linux/Unix/BSD etc.) if SingleFile is not installed globally.
-
-  `chmod +x single-file`
-
-- To use Firefox instead of Chrome, you must download the [Selenium WebDriver](https://www.npmjs.com/package/selenium-webdriver) component (i.e. `geckodriver` for Firefox).  Make sure it can be found through the `PATH` environment variable or the `cli` folder. Otherwise you will need to set the `--web-driver-executable-path` option to help WebDriver locating the executable.
-
-## Run
-
-- Syntax
- 
-  `single-file <url> [output] [options ...]`
-
-- Display help
-
-  `single-file --help`
-
-- Examples
-
-  - Dump the processed content of https://www.wikipedia.org into the console
-
-  `single-file https://www.wikipedia.org --dump-content`
-
-  - Save https://www.wikipedia.org into `wikipedia.html` in the current folder
-
-  `single-file https://www.wikipedia.org wikipedia.html`
-
-  - Save https://www.wikipedia.org into `wikipedia.html` in the current folder with Firefox instead of Chrome
-
-  `single-file https://www.wikipedia.org wikipedia.html --back-end=webdriver-gecko`
-
-  - Save a list of URLs stored into `list-urls.txt` in the current folder
-
-  `single-file --urls-file=list-urls.txt`
-
-  - Save https://www.wikipedia.org and crawl its internal links with the query parameters removed from the URL
-
-  `single-file https://www.wikipedia.org --crawl-links=true --crawl-inner-links-only=true --crawl-max-depth=1 --crawl-rewrite-rule="^(.*)\\?.*$ $1"`
-
-  - Save https://www.wikipedia.org and external links only
-
-  `single-file https://www.wikipedia.org --crawl-links=true --crawl-inner-links-only=false --crawl-external-links-max-depth=1 --crawl-rewrite-rule="^.*wikipedia.*$"`
-  
-## Troubleshooting
-
- - If the error message `UnhandledPromiseRejectionWarning: Error: Browser is not downloaded. Run "npm install" or "yarn install" at ChromeLauncher.launch` is displayed, it probably means that `single-file` was not able to find the executable of the browser. Using the option `--browser-executable-path` to pass to `single-file` the complete path of the executable fixes this issue.
- 
-  - If saving a page takes an unusually long time, this may be due to a timeout error that was automatically recovered. Setting `--browser-wait-until` to a lower value (e.g. `networkidle0` or `load` instead of `networkidle2`) fixes this issue.
-
-## License
-
-SingleFile is licensed under AGPL. Code derived from third-party projects is licensed under MIT. Please contact me at gildas.lormeau &lt;at&gt; gmail.com if you are interested in licensing the SingleFile code for a commercial service or product.
+It is now recommended to use `single-file-cli` directly.

+ 0 - 305
cli/args.js

@@ -1,305 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- *
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or
- *   modify it under the terms of the GNU Affero General Public License
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- *
- *   The code in this file is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
- *   AGPL normally required by section 4, provided you include this license
- *   notice and a URL through which recipients can access the Corresponding
- *   Source.
- */
-
-/* global require, module */
-
-const args = require("yargs")
-	.wrap(null)
-	.command("$0 [url] [output]", "Save a page into a single HTML file.", yargs => {
-		yargs.positional("url", { description: "URL or path on the filesystem of the page to save", type: "string" });
-		yargs.positional("output", { description: "Output filename", type: "string" });
-	})
-	.default({
-		"accept-headers": {
-			"font": "application/font-woff2;q=1.0,application/font-woff;q=0.9,*/*;q=0.8",
-			"image": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
-			"stylesheet": "text/css,*/*;q=0.1",
-			"script": "*/*",
-			"document": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
-		},
-		"back-end": "puppeteer",
-		"block-mixed-content": false,
-		"browser-server": "",
-		"browser-headless": true,
-		"browser-executable-path": "",
-		"browser-width": 1280,
-		"browser-height": 720,
-		"browser-load-max-time": 60000,
-		"browser-wait-delay": 0,
-		"browser-wait-until": "networkidle0",
-		"browser-wait-until-fallback": true,
-		"browser-debug": false,
-		"browser-script": [],
-		"browser-stylesheet": [],
-		"browser-args": "",
-		"browser-start-minimized": false,
-		"browser-cookie": [],
-		"browser-cookies-file": "",
-		"compress-CSS": false,
-		"compress-HTML": true,
-		"dump-content": false,
-		"emulateMediaFeature": [],
-		"filename-template": "{page-title} ({date-iso} {time-locale}).html",
-		"filename-conflict-action": "uniquify",
-		"filename-replacement-character": "_",
-		"filename-max-length": 192,
-		"filename-max-length-unit": "bytes",
-		"group-duplicate-images": true,
-		"http-header": [],
-		"include-infobar": false,
-		"insert-meta-csp": true,
-		"load-deferred-images": true,
-		"load-deferred-images-dispatch-scroll-event": false,
-		"load-deferred-images-max-idle-time": 1500,
-		"load-deferred-images-keep-zoom-level": false,
-		"max-parallel-workers": 8,
-		"max-resource-size-enabled": false,
-		"max-resource-size": 10,
-		"move-styles-in-head": false,
-		"output-directory": "",
-		"remove-hidden-elements": true,
-		"remove-unused-styles": true,
-		"remove-unused-fonts": true,
-		"remove-frames": false,
-		"remove-imports": true,
-		"block-scripts": true,
-		"block-audios": true,
-		"block-videos": true,
-		"remove-alternative-fonts": true,
-		"remove-alternative-medias": true,
-		"remove-alternative-images": true,
-		"save-original-urls": false,
-		"save-raw-page": false,
-		"web-driver-executable-path": "",
-		"user-script-enabled": true,
-		"include-BOM": false,
-		"crawl-links": false,
-		"crawl-inner-links-only": true,
-		"crawl-remove-url-fragment": true,
-		"crawl-max-depth": 1,
-		"crawl-external-links-max-depth": 1,
-		"crawl-replace-urls": false,
-		"crawl-rewrite-rule": []
-	})
-	.options("back-end", { description: "Back-end to use" })
-	.choices("back-end", ["jsdom", "puppeteer", "webdriver-chromium", "webdriver-gecko", "puppeteer-firefox", "playwright-firefox", "playwright-chromium"])
-	.options("block-mixed-content", { description: "Block mixed contents" })
-	.boolean("block-mixed-content")
-	.options("browser-server", { description: "Server to connect to (puppeteer only for now)" })
-	.string("browser-server")
-	.options("browser-headless", { description: "Run the browser in headless mode (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.boolean("browser-headless")
-	.options("browser-executable-path", { description: "Path to chrome/chromium executable (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.string("browser-executable-path")
-	.options("browser-width", { description: "Width of the browser viewport in pixels" })
-	.number("browser-width")
-	.options("browser-height", { description: "Height of the browser viewport in pixels" })
-	.number("browser-height")
-	.options("browser-load-max-time", { description: "Maximum delay of time to wait for page loading in ms (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.number("browser-load-max-time")
-	.options("browser-wait-delay", { description: "Time to wait before capturing the page in ms" })
-	.number("browser-wait-delay")
-	.options("browser-wait-until", { description: "When to consider the page is loaded (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.choices("browser-wait-until", ["networkidle0", "networkidle2", "load", "domcontentloaded"])
-	.options("browser-wait-until-fallback", { description: "Retry with the next value of --browser-wait-until when a timeout error is thrown" })
-	.boolean("browser-wait-until-fallback")
-	.options("browser-debug", { description: "Enable debug mode (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.boolean("browser-debug")
-	.options("browser-script", { description: "Path of a script executed in the page (and all the frames) before it is loaded" })
-	.array("browser-script")
-	.options("browser-stylesheet", { description: "Path of a stylesheet file inserted into the page (and all the frames) after it is loaded" })
-	.array("browser-stylesheet")
-	.options("browser-args", { description: "Arguments provided as a JSON array and passed to the browser (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.string("browser-args")
-	.options("browser-start-minimized", { description: "Minimize the browser (puppeteer)" })
-	.boolean("browser-start-minimized")
-	.options("browser-cookie", { description: "Ordered list of cookie parameters separated by a comma: name,value,domain,path,expires,httpOnly,secure,sameSite,url (puppeteer, webdriver-gecko, webdriver-chromium, jsdom)" })
-	.array("browser-cookie")
-	.options("browser-cookies-file", { description: "Path of the cookies file formatted as a JSON file or a Netscape text file (puppeteer, webdriver-gecko, webdriver-chromium, jsdom)" })
-	.string("browser-cookies-file")
-	.options("compress-CSS", { description: "Compress CSS stylesheets" })
-	.boolean("compress-CSS")
-	.options("compress-HTML", { description: "Compress HTML content" })
-	.boolean("compress-HTML")
-	.options("crawl-links", { description: "Crawl and save pages found via inner links" })
-	.boolean("crawl-links")
-	.options("crawl-inner-links-only", { description: "Crawl pages found via inner links only if they are hosted on the same domain" })
-	.boolean("crawl-inner-links-only")
-	.options("crawl-no-parent", { description: "Crawl pages found via inner links only if their URLs are not parent of the URL to crawl" })
-	.boolean("crawl-no-parent")
-	.options("crawl-load-session", { description: "Name of the file of the session to load (previously saved with --crawl-save-session or --crawl-sync-session)" })
-	.string("crawl-load-session")
-	.options("crawl-remove-url-fragment", { description: "Remove URL fragments found in links" })
-	.boolean("crawl-remove-url-fragment")
-	.options("crawl-save-session", { description: "Name of the file where to save the state of the session" })
-	.string("crawl-save-session")
-	.options("crawl-sync-session", { description: "Name of the file where to load and save the state of the session" })
-	.string("crawl-sync-session")
-	.options("crawl-max-depth", { description: "Max depth when crawling pages found in internal and external links (0: infinite)" })
-	.number("crawl-max-depth")
-	.options("crawl-external-links-max-depth", { description: "Max depth when crawling pages found in external links (0: infinite)" })
-	.number("crawl-external-links-max-depth")
-	.options("crawl-replace-urls", { description: "Replace URLs of saved pages with relative paths of saved pages on the filesystem" })
-	.boolean("crawl-replace-urls")
-	.options("crawl-rewrite-rule", { description: "Rewrite rule used to rewrite URLs of crawled pages" })
-	.array("crawl-rewrite-rule")
-	.options("dump-content", { description: "Dump the content of the processed page in the console ('true' when running in Docker)" })
-	.boolean("dump-content")
-	.options("emulate-media-feature", { description: "Emulate a media feature. The syntax is <name>:<value>, e.g. \"prefers-color-scheme:dark\" (puppeteer)" })
-	.array("emulate-media-feature")
-	.options("error-file")
-	.string("error-file")
-	.options("filename-template", { description: "Template used to generate the output filename (see help page of the extension for more info)" })
-	.string("filename-template")
-	.options("filename-conflict-action", { description: "Action when the filename is conflicting with existing one on the filesystem. The possible values are \"uniquify\" (default), \"overwrite\" and \"skip\"" })
-	.string("filename-conflict-action")
-	.options("filename-replacement-character", { description: "The character used for replacing invalid characters in filenames" })
-	.string("filename-replacement-character")
-	.options("filename-max-length", { description: "Specify the maximum length of the filename" })
-	.number("filename-max-length")
-	.options("filename-max-length-unit", { description: "Specify the unit of the maximum length of the filename ('bytes' or 'chars')" })
-	.string("filename-max-length-unit")
-	.options("group-duplicate-images", { description: "Group duplicate images into CSS custom properties" })
-	.boolean("group-duplicate-images")
-	.options("http-header", { description: "Extra HTTP header (puppeteer, jsdom)" })
-	.array("http-header")
-	.options("include-BOM", { description: "Include the UTF-8 BOM into the HTML page" })
-	.boolean("include-BOM")
-	.options("include-infobar", { description: "Include the infobar" })
-	.boolean("include-infobar")
-	.options("insert-meta-csp", { description: "Include a <meta> tag with a CSP to avoid potential requests to internet when viewing a page" })
-	.boolean("insert-meta-csp")
-	.options("load-deferred-images", { description: "Load deferred (a.k.a. lazy-loaded) images (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.boolean("load-deferred-images")
-	.options("load-deferred-images-dispatch-scroll-event", { description: "Dispatch 'scroll' event when loading deferred images" })
-	.boolean("load-deferred-images-dispatch-scroll-event")
-	.options("load-deferred-images-max-idle-time", { description: "Maximum delay of time to wait for deferred images in ms (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.number("load-deferred-images-max-idle-time")
-	.options("load-deferred-images-keep-zoom-level", { description: "Load deferred images by keeping zoomed out the page" })
-	.boolean("load-deferred-images-keep-zoom-level")
-	.options("max-parallel-workers", { description: "Maximum number of browsers launched in parallel when processing a list of URLs (cf --urls-file)" })
-	.number("max-parallel-workers")
-	.options("max-resource-size-enabled", { description: "Enable removal of embedded resources exceeding a given size" })
-	.boolean("max-resource-size-enabled")
-	.options("max-resource-size", { description: "Maximum size of embedded resources in MB (i.e. images, stylesheets, scripts and iframes)" })
-	.number("max-resource-size")
-	.options("move-styles-in-head", { description: "Move style elements outside the head element into the head element" })
-	.boolean("move-styles-in-head")
-	.options("remove-frames", { description: "Remove frames (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.boolean("remove-frames")
-	.options("remove-hidden-elements", { description: "Remove HTML elements which are not displayed" })
-	.boolean("remove-hidden-elements")
-	.options("remove-unused-styles", { description: "Remove unused CSS rules and unneeded declarations" })
-	.boolean("remove-unused-styles")
-	.options("remove-unused-fonts", { description: "Remove unused CSS font rules" })
-	.boolean("remove-unused-fonts")
-	.options("remove-imports", { description: "Remove HTML imports" })
-	.boolean("remove-imports")
-	.options("block-scripts", { description: "Block scripts" })
-	.boolean("block-scripts")
-	.options("block-audios", { description: "Block audio elements" })
-	.boolean("block-audios")
-	.options("block-videos", { description: "Block video elements" })
-	.boolean("block-videos")
-	.options("remove-alternative-fonts", { description: "Remove alternative fonts to the ones displayed" })
-	.boolean("remove-alternative-fonts")
-	.options("remove-alternative-medias", { description: "Remove alternative CSS stylesheets" })
-	.boolean("remove-alternative-medias")
-	.options("remove-alternative-images", { description: "Remove images for alternative sizes of screen" })
-	.boolean("remove-alternative-images")
-	.options("save-original-urls", { description: "Save the original URLS in the embedded contents" })
-	.boolean("save-original-urls")
-	.options("save-raw-page", { description: "Save the original page without interpreting it into the browser (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.boolean("save-raw-page")
-	.options("urls-file", { description: "Path to a text file containing a list of URLs (separated by a newline) to save" })
-	.string("urls-file")
-	.options("user-agent", { description: "User-agent of the browser (puppeteer, webdriver-gecko, webdriver-chromium)" })
-	.string("user-agent")
-	.options("user-script-enabled", { description: "Enable the event API allowing to execute scripts before the page is saved" })
-	.boolean("user-script-enabled")
-	.options("web-driver-executable-path", { description: "Path to Selenium WebDriver executable (webdriver-gecko, webdriver-chromium)" })
-	.string("web-driver-executable-path")
-	.options("output-directory", { description: "Path to where to save files, this path must exist." })
-	.string("output-directory")
-	.argv;
-args.backgroundSave = true;
-args.compressCSS = args.compressCss;
-args.compressHTML = args.compressHtml;
-args.includeBOM = args.includeBom;
-args.crawlReplaceURLs = args.crawlReplaceUrls;
-args.crawlRemoveURLFragment = args.crawlRemoveUrlFragment;
-args.insertMetaCSP = args.insertMetaCsp;
-if (args.removeScripts) {
-	args.blockScripts = true;
-}
-if (args.removeAudioSrc) {
-	args.blockAudios = true;
-}
-if (args.removeVideoSrc) {
-	args.blockVideos = true;
-}
-const headers = args.httpHeader;
-delete args.httpHeader;
-args.httpHeaders = {};
-headers.forEach(header => {
-	const matchedHeader = header.match(/^(.*?):(.*)$/);
-	if (matchedHeader.length == 3) {
-		args.httpHeaders[matchedHeader[1].trim()] = matchedHeader[2].trimLeft();
-	}
-});
-const cookies = args.browserCookie;
-delete args.browserCookie;
-args.browserCookies = cookies.map(cookieValue => {
-	const value = cookieValue.split(/(?<!\\),/);
-	return {
-		name: value[0],
-		value: value[1],
-		domain: value[2] || undefined,
-		path: value[3] || undefined,
-		expires: value[4] && Number(value[4]) || undefined,
-		httpOnly: value[5] && value[5] == "true" || undefined,
-		secure: value[6] && value[5] == "true" || undefined,
-		sameSite: value[7] || undefined,
-		url: value[8] || undefined
-	};
-});
-args.browserScripts = args.browserScript;
-delete args.browserScript;
-args.browserStylesheets = args.browserStylesheet;
-delete args.browserStylesheet;
-args.crawlRewriteRules = args.crawlRewriteRule;
-delete args.crawlRewriteRule;
-args.emulateMediaFeatures = args.emulateMediaFeature
-	.map(value => {
-		const splitValue = value.match(/^([^:]+):(.*)$/);
-		if (splitValue.length >= 3) {
-			return { name: splitValue[1].trim(), value: splitValue[2].trim() };
-		}
-	})
-	.filter(identity => identity);
-delete args.emulateMediaFeature;
-Object.keys(args).filter(optionName => optionName.includes("-"))
-	.forEach(optionName => delete args[optionName]);
-delete args["$0"];
-delete args["_"];
-module.exports = args;

+ 0 - 66
cli/back-ends/common/scripts.js

@@ -1,66 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require, exports */
-
-const fs = require("fs");
-
-const SCRIPTS = [
-	"lib/single-file.js",
-	"lib/single-file-bootstrap.js",
-	"lib/single-file-hooks.js",
-	"lib/single-file-hooks-frames.js"
-];
-
-const basePath = "../../../";
-
-exports.get = async options => {
-	let scripts = "let _singleFileDefine; if (typeof define !== 'undefined') { _singleFileDefine = define; define = null }";
-	scripts += await readScriptFiles(SCRIPTS, basePath);
-	scripts += await readScriptFiles(options && options.browserScripts ? options.browserScripts : [], "");
-	if (options.browserStylesheets && options.browserStylesheets.length) {
-		scripts += "addEventListener(\"load\",()=>{const styleElement=document.createElement(\"style\");styleElement.textContent=" + JSON.stringify(await readScriptFiles(options.browserStylesheets, "")) + ";document.body.appendChild(styleElement);});";
-	}
-	scripts += "if (_singleFileDefine) { define = _singleFileDefine; _singleFileDefine = null }";
-	return scripts;
-};
-
-exports.getInfobarScript = () => {
-	return readScriptFile("lib/single-file-infobar.js", basePath);
-};
-
-async function readScriptFiles(paths, basePath = "../../../") {
-	return (await Promise.all(paths.map(path => readScriptFile(path, basePath)))).join("");
-}
-
-function readScriptFile(path, basePath) {
-	return new Promise((resolve, reject) =>
-		fs.readFile(basePath ? require.resolve(basePath + path) : path, (err, data) => {
-			if (err) {
-				reject(err);
-			} else {
-				resolve(data.toString() + "\n");
-			}
-		})
-	);
-}

+ 0 - 36
cli/back-ends/extensions/bypass-csp/index.js

@@ -1,36 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-const removedHeaders = ["content-security-policy"];
-
-const browserAPI = this.browser || this.chrome;
-
-browserAPI.webRequest.onHeadersReceived.addListener(
-	function (details) {
-		let responseHeaders = details.responseHeaders;
-		responseHeaders = responseHeaders.filter(responseHeader => !removedHeaders.includes(responseHeader.name.toLowerCase()));
-		return { responseHeaders };
-	},
-	{ urls: ["<all_urls>"] },
-	["blocking", "responseHeaders"]
-);

+ 0 - 20
cli/back-ends/extensions/bypass-csp/manifest.json

@@ -1,20 +0,0 @@
-{
-	"name": "bypass-csp",
-	"version": "0.0.3",
-	"background": {
-		"scripts": [
-			"index.js"
-		]
-	},
-	"permissions": [
-		"webRequest",
-		"webRequestBlocking",
-		"<all_urls>"
-	],
-	"browser_specific_settings": {
-		"gecko": {
-			"id": "{55e2789c-817b-4d75-815b-df6921c84ed8}"
-		}
-	},
-	"manifest_version": 2
-}

+ 0 - 54
cli/back-ends/extensions/disable-web-security/index.js

@@ -1,54 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-const removedHeaders = ["access-control-allow-methods", "access-control-allow-headers"];
-const updatedHeaders = { "access-control-allow-origin": "*", "access-control-allow-credentials": "true" };
-
-const browserAPI = this.browser || this.chrome;
-
-browserAPI.webRequest.onHeadersReceived.addListener(
-	function (details) {
-		let responseHeaders = details.responseHeaders;
-		let processedHeaders = [];
-		responseHeaders = responseHeaders.filter(responseHeader => !removedHeaders.includes(responseHeader.name.toLowerCase()));
-		responseHeaders.forEach(responseHeader => {
-			const name = responseHeader.name.toLowerCase();
-			const value = updatedHeaders[name];
-			if (value) {
-				responseHeader.value = value;
-				processedHeaders.push(name);
-			}
-		});
-		Object.keys(updatedHeaders).forEach(name => {
-			if (!processedHeaders.includes(name)) {
-				const value = updatedHeaders[name.toLowerCase()];
-				if (value) {
-					responseHeaders.push({ name, value });
-				}
-			}
-		});
-		return { responseHeaders };
-	},
-	{ urls: ["<all_urls>"] },
-	["blocking", "responseHeaders"]
-);

+ 0 - 20
cli/back-ends/extensions/disable-web-security/manifest.json

@@ -1,20 +0,0 @@
-{
-	"name": "disable-web-security",
-	"version": "0.0.3",
-	"background": {
-		"scripts": [
-			"index.js"
-		]
-	},
-	"permissions": [
-		"webRequest",
-		"webRequestBlocking",
-		"<all_urls>"
-	],
-	"browser_specific_settings": {
-		"gecko": {
-			"id": "{588434c2-67c3-4f77-9828-c30c7d63e8f9}"
-		}
-	},
-	"manifest_version": 2
-}

+ 0 - 61
cli/back-ends/extensions/network-idle/bg.js

@@ -1,61 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global setTimeout, clearTimeout */
-
-const browserAPI = this.browser || this.chrome;
-
-const IDLE_DELAY = 1000;
-const watchDogs = [];
-let pendingRequests = new Set();
-
-browserAPI.webRequest.onSendHeaders.addListener(onRequest, { urls: ["<all_urls>"] }, []);
-browserAPI.webRequest.onResponseStarted.addListener(onResponse, { urls: ["<all_urls>"] }, []);
-browserAPI.webRequest.onErrorOccurred.addListener(onResponse, { urls: ["<all_urls>"] });
-
-function onRequest(details) {
-	if (details.tabId != -1) {
-		pendingRequests.add(details.requestId);
-		if (pendingRequests.size > 2) {
-			clearTimeout(watchDogs[2]);
-		}
-		clearTimeout(watchDogs[0]);
-	}
-}
-
-function onResponse(details) {
-	if (details.tabId != -1) {
-		pendingRequests.delete(details.requestId);
-		if (pendingRequests.size == 2) {
-			maybeIdle(2, details.tabId);
-		}
-		if (pendingRequests.size == 0) {
-			maybeIdle(0, details.tabId);
-		}
-	}
-}
-
-function maybeIdle(idleLevel, tabId) {
-	clearTimeout(watchDogs[idleLevel]);
-	watchDogs[idleLevel] = setTimeout(() => browserAPI.tabs.sendMessage(tabId, "network-idle-" + idleLevel), IDLE_DELAY);
-}

+ 0 - 29
cli/back-ends/extensions/network-idle/content.js

@@ -1,29 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global window, CustomEvent */
-
-const browserAPI = this.browser || this.chrome;
-const dispatchEvent = event => { try { window.dispatchEvent(event); } catch (error) {  /* ignored */ } };
-
-browserAPI.runtime.onMessage.addListener(message => dispatchEvent(new CustomEvent("single-file-" + message)));

+ 0 - 31
cli/back-ends/extensions/network-idle/manifest.json

@@ -1,31 +0,0 @@
-{
-	"name": "network-idle",
-	"version": "0.0.2",
-	"background": {
-		"scripts": [
-			"bg.js"
-		]
-	},
-	"content_scripts": [
-		{
-			"matches": [
-				"<all_urls>"
-			],
-			"run_at": "document_start",
-			"js": [
-				"content.js"
-			]
-		}
-	],
-	"permissions": [
-		"tabs",
-		"webRequest",
-		"<all_urls>"
-	],
-	"browser_specific_settings": {
-		"gecko": {
-			"id": "{caf9306a-8951-4f0c-beb0-bab690d00caf}"
-		}
-	},
-	"manifest_version": 2
-}

BIN
cli/back-ends/extensions/signed/bypass_csp-0.0.3-an+fx.xpi


BIN
cli/back-ends/extensions/signed/disable_web_security-0.0.3-an+fx.xpi


BIN
cli/back-ends/extensions/signed/network_idle-0.0.2-an+fx.xpi


+ 0 - 175
cli/back-ends/jsdom.js

@@ -1,175 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require, exports, Buffer, setTimeout */
-
-const crypto = require("crypto");
-
-const jsdom = require("jsdom");
-const { JSDOM, VirtualConsole } = jsdom;
-const iconv = require("iconv-lite");
-
-exports.initialize = async () => { };
-
-exports.getPageData = async options => {
-	let win;
-	try {
-		const dom = await JSDOM.fromURL(options.url, await getBrowserOptions(options));
-		win = dom.window;
-		return await getPageData(win, options);
-	} finally {
-		if (win) {
-			win.close();
-		}
-	}
-};
-
-exports.closeBrowser = () => { };
-
-async function getPageData(win, options) {
-	const doc = win.document;
-	const scripts = await require("./common/scripts.js").get(options);
-	win.TextDecoder = class {
-		constructor(utfLabel) {
-			this.utfLabel = utfLabel;
-		}
-		decode(buffer) {
-			return iconv.decode(Buffer.from(buffer), this.utfLabel);
-		}
-	};
-	win.crypto = {
-		subtle: {
-			digest: async function digestText(algo, text) {
-				const hash = crypto.createHash(algo.replace("-", "").toLowerCase());
-				hash.update(text, "utf-8");
-				return hash.digest();
-			}
-		}
-	};
-	win.Element.prototype.getBoundingClientRect = undefined;
-	win.getComputedStyle = () => { };
-	win.eval(scripts);
-	if (win.document.readyState == "loading" || win.document.readyState == "interactive") {
-		await new Promise(resolve => win.onload = resolve);
-	}
-	executeFrameScripts(doc, scripts);
-	options.removeHiddenElements = false;
-	options.loadDeferredImages = false;
-	if (options.browserWaitDelay) {
-		await new Promise(resolve => setTimeout(resolve, options.browserWaitDelay));
-	}
-	const pageData = await win.singlefile.getPageData(options, { fetch: url => fetchResource(url, options) }, doc, win);
-	if (options.includeInfobar) {
-		await win.infobar.includeScript(pageData);
-	}
-	return pageData;
-
-	async function fetchResource(resourceURL) {
-		return new Promise((resolve, reject) => {
-			const xhrRequest = new win.XMLHttpRequest();
-			xhrRequest.withCredentials = true;
-			xhrRequest.responseType = "arraybuffer";
-			xhrRequest.onerror = event => reject(new Error(event.detail));
-			xhrRequest.onreadystatechange = () => {
-				if (xhrRequest.readyState == win.XMLHttpRequest.DONE) {
-					resolve({
-						arrayBuffer: async () => new Uint8Array(xhrRequest.response).buffer,
-						headers: {
-							get: headerName => xhrRequest.getResponseHeader(headerName)
-						},
-						status: xhrRequest.status
-					});
-				}
-			};
-			xhrRequest.open("GET", resourceURL, true);
-			xhrRequest.send();
-		});
-	}
-}
-
-async function getBrowserOptions(options) {
-	class ResourceLoader extends jsdom.ResourceLoader {
-		_getRequestOptions(fetchOptions) {
-			const requestOptions = super._getRequestOptions(fetchOptions);
-			if (options.httpHeaders) {
-				requestOptions.headers = Object.assign(requestOptions.headers, options.httpHeaders);
-			}
-			return requestOptions;
-		}
-	}
-	const resourceLoader = new ResourceLoader({
-		userAgent: options.userAgent
-	});
-	const jsdomOptions = {
-		virtualConsole: new VirtualConsole(),
-		userAgent: options.userAgent,
-		pretendToBeVisual: true,
-		runScripts: "outside-only",
-		resources: resourceLoader
-	};
-	if (options.browserWidth && options.browserHeight) {
-		jsdomOptions.beforeParse = function (window) {
-			window.outerWidth = window.innerWidth = options.browserWidth;
-			window.outerHeight = window.innerHeight = options.browserHeight;
-		};
-	}
-	if (options.browserCookies && options.browserCookies.length) {
-		jsdomOptions.cookieJar = new jsdom.CookieJar();
-		await Promise.all(options.browserCookies.map(cookie => {
-			let cookieString = cookie.name + "=" + cookie.value;
-			if (cookie.path) {
-				cookieString += ";path=" + cookie.path;
-			}
-			if (cookie.domain) {
-				cookieString += ";domain=" + cookie.domain;
-			}
-			if (cookie.expires) {
-				cookieString += ";max-age=" + cookie.expires;
-			}
-			if (cookie.secure) {
-				cookieString += ";secure";
-			}
-			if (cookie.sameSite) {
-				cookieString += ";samesite=" + options.sameSite;
-			}
-			const cookieOptions = {
-				http: Boolean(cookie.httpOnly)
-			};
-			return new Promise((resolve, reject) => jsdomOptions.cookieJar.setCookie(cookieString, options.url, cookieOptions, error => error ? reject(error) : resolve()));
-		}));
-	}
-	return jsdomOptions;
-}
-
-function executeFrameScripts(doc, scripts) {
-	const frameElements = doc.querySelectorAll("iframe, frame");
-	frameElements.forEach(frameElement => {
-		try {
-			frameElement.contentWindow.Element.prototype.getBoundingClientRect = undefined;
-			frameElement.contentWindow.eval(scripts);
-			executeFrameScripts(frameElement.contentDocument, scripts);
-		} catch (error) {
-			// ignored
-		}
-	});
-}

+ 0 - 113
cli/back-ends/playwright-chromium.js

@@ -1,113 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global singlefile, infobar, require, exports */
-
-const playwright = require("playwright").chromium;
-const scripts = require("./common/scripts.js");
-
-const NETWORK_IDLE_STATE = "networkidle";
-
-let browser;
-
-exports.initialize = async options => {
-	browser = await playwright.launch(getBrowserOptions(options));
-};
-
-exports.getPageData = async options => {
-	let page, context;
-	try {
-		context = await browser.newContext({
-			bypassCSP: options.browserBypassCSP === undefined || options.browserBypassCSP
-		});
-		await setContextOptions(context, options);
-		page = await context.newPage();
-		await setPageOptions(page, options);
-		return await getPageData(page, options);
-	} finally {
-		if (page) {
-			await page.close();
-		}
-	}
-};
-
-exports.closeBrowser = () => {
-	if (browser) {
-		return browser.close();
-	}
-};
-
-function getBrowserOptions(options) {
-	const browserOptions = {};
-	if (options.browserHeadless !== undefined) {
-		browserOptions.headless = options.browserHeadless && !options.browserDebug;
-	}
-	browserOptions.args = options.browserArgs ? JSON.parse(options.browserArgs) : [];
-	if (options.browserExecutablePath) {
-		browserOptions.executablePath = options.browserExecutablePath || "chrome";
-	}
-	return browserOptions;
-}
-
-async function setContextOptions(context, options) {
-	if (options.browserCookies && options.browserCookies.length) {
-		await context.addCookies(options.browserCookies);
-	}
-}
-
-async function setPageOptions(page, options) {
-	if (options.browserWidth && options.browserHeight) {
-		await page.setViewportSize({
-			width: options.browserWidth,
-			height: options.browserHeight
-		});
-	}	
-	if (options.httpHeaders) {
-		page.setExtraHTTPHeaders(options.httpHeaders);
-	}
-	if (options.emulateMediaFeatures) {
-		await page.emulateMediaFeatures(options.emulateMediaFeatures);
-	}
-}
-
-async function getPageData(page, options) {
-	const injectedScript = await scripts.get(options);
-	await page.addInitScript(injectedScript);
-	if (options.browserDebug) {
-		await page.waitForTimeout(3000);
-	}
-	await page.goto(options.url, {
-		timeout: options.browserLoadMaxTime || 0,
-		waitUntil: options.browserWaitUntil && options.browserWaitUntil.startsWith("networkidle") ? NETWORK_IDLE_STATE : options.browserWaitUntil || NETWORK_IDLE_STATE
-	});
-	if (options.browserWaitDelay) {
-		await page.waitForTimeout(options.browserWaitDelay);
-	}
-	return await page.evaluate(async options => {
-		const pageData = await singlefile.getPageData(options);
-		if (options.includeInfobar) {
-			await infobar.includeScript(pageData);
-		}
-		return pageData;
-	}, options);
-}

+ 0 - 113
cli/back-ends/playwright-firefox.js

@@ -1,113 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global singlefile, infobar, require, exports */
-
-const playwright = require("playwright").firefox;
-const scripts = require("./common/scripts.js");
-
-const NETWORK_IDLE_STATE = "networkidle";
-
-let browser;
-
-exports.initialize = async options => {
-	browser = await playwright.launch(getBrowserOptions(options));
-};
-
-exports.getPageData = async options => {
-	let page, context;
-	try {
-		context = await browser.newContext({
-			bypassCSP: options.browserBypassCSP === undefined || options.browserBypassCSP
-		});
-		await setContextOptions(context, options);
-		page = await context.newPage();
-		await setPageOptions(page, options);
-		return await getPageData(page, options);
-	} finally {
-		if (page) {
-			await page.close();
-		}
-	}
-};
-
-exports.closeBrowser = () => {
-	if (browser) {
-		return browser.close();
-	}
-};
-
-function getBrowserOptions(options) {
-	const browserOptions = {};
-	if (options.browserHeadless !== undefined) {
-		browserOptions.headless = options.browserHeadless && !options.browserDebug;
-	}
-	browserOptions.args = options.browserArgs ? JSON.parse(options.browserArgs) : [];
-	if (options.browserExecutablePath) {
-		browserOptions.executablePath = options.browserExecutablePath || "firefox";
-	}
-	return browserOptions;
-}
-
-async function setContextOptions(context, options) {
-	if (options.browserCookies && options.browserCookies.length) {
-		await context.addCookies(options.browserCookies);
-	}
-}
-
-async function setPageOptions(page, options) {
-	if (options.browserWidth && options.browserHeight) {
-		await page.setViewportSize({
-			width: options.browserWidth,
-			height: options.browserHeight
-		});
-	}
-	if (options.httpHeaders) {
-		page.setExtraHTTPHeaders(options.httpHeaders);
-	}
-	if (options.emulateMediaFeatures) {
-		await page.emulateMediaFeatures(options.emulateMediaFeatures);
-	}
-}
-
-async function getPageData(page, options) {
-	const injectedScript = await scripts.get(options);
-	await page.addInitScript(injectedScript);
-	if (options.browserDebug) {
-		await page.waitForTimeout(3000);
-	}
-	await page.goto(options.url, {
-		timeout: options.browserLoadMaxTime || 0,
-		waitUntil: options.browserWaitUntil && options.browserWaitUntil.startsWith("networkidle") ? NETWORK_IDLE_STATE : options.browserWaitUntil || NETWORK_IDLE_STATE
-	});
-	if (options.browserWaitDelay) {
-		await page.waitForTimeout(options.browserWaitDelay);
-	}
-	return await page.evaluate(async options => {
-		const pageData = await singlefile.getPageData(options);
-		if (options.includeInfobar) {
-			await infobar.includeScript(pageData);
-		}
-		return pageData;
-	}, options);
-}

+ 0 - 170
cli/back-ends/puppeteer-firefox.js

@@ -1,170 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global singlefile, infobar, require, exports */
-
-const puppeteer = require("puppeteer-core");
-const scripts = require("./common/scripts.js");
-
-const EXECUTION_CONTEXT_DESTROYED_ERROR = "Execution context was destroyed";
-const NETWORK_IDLE_STATE = "networkidle0";
-
-let browser;
-
-exports.initialize = async options => {
-	browser = await puppeteer.launch(getBrowserOptions(options));
-};
-
-exports.getPageData = async options => {
-	let page;
-	try {
-		page = await browser.newPage();
-		await setPageOptions(page, options);
-		return await getPageData(browser, page, options);
-	} finally {
-		if (page) {
-			await page.close();
-		}
-	}
-};
-
-exports.closeBrowser = () => {
-	if (browser) {
-		return browser.close();
-	}
-};
-
-function getBrowserOptions(options) {
-	const browserOptions = {};
-	if (options.browserHeadless !== undefined) {
-		browserOptions.headless = options.browserHeadless && !options.browserDebug;
-	}
-	browserOptions.args = options.browserArgs ? JSON.parse(options.browserArgs) : [];
-	if (options.browserExecutablePath) {
-		browserOptions.executablePath = options.browserExecutablePath || "firefox";
-	}
-	browserOptions.product = "firefox";
-	return browserOptions;
-}
-
-async function setPageOptions(page, options) {
-	if (options.browserWidth && options.browserHeight) {
-		await page.setViewport({
-			width: options.browserWidth,
-			height: options.browserHeight
-		});
-	}
-	if ((options.browserBypassCSP === undefined || options.browserBypassCSP) && page.setBypassCSP) {
-		try {
-			await page.setBypassCSP(true);
-		} catch (error) {
-			// ignored
-		}
-	}
-	if (options.httpHeaders) {
-		try {
-			await page.setExtraHTTPHeaders(options.httpHeaders);
-		} catch (error) {
-			// ignored
-		}
-	}
-	if (options.browserCookies && options.browserCookies.length) {
-		await page.setCookie(...options.browserCookies);
-	}
-	if (options.emulateMediaFeatures) {
-		try {
-			await page.emulateMediaFeatures(options.emulateMediaFeatures);
-		} catch (error) {
-			// ignored
-		}
-	}
-}
-
-async function getPageData(browser, page, options) {
-	const injectedScript = await scripts.get(options);
-	await page.evaluateOnNewDocument(injectedScript);
-	if (options.browserDebug) {
-		await page.waitForTimeout(3000);
-	}
-	await pageGoto(page, options);
-	try {
-		await page.evaluate(injectedScript);
-		if (options.browserWaitDelay) {
-			await page.waitForTimeout(options.browserWaitDelay);
-		}
-		return await page.evaluate(async options => {
-			const pageData = await singlefile.getPageData(options);
-			if (options.includeInfobar) {
-				await infobar.includeScript(pageData);
-			}
-			return pageData;
-		}, options);
-	} catch (error) {
-		if (error.message && error.message.includes(EXECUTION_CONTEXT_DESTROYED_ERROR)) {
-			const pageData = await handleJSRedirect(browser, options);
-			if (pageData) {
-				return pageData;
-			} else {
-				throw error;
-			}
-		} else if (error.name != "TimeoutError") {
-			throw error;
-		}
-	}
-}
-
-async function handleJSRedirect(browser, options) {
-	const pages = await browser.pages();
-	const page = pages[1] || pages[0];
-	try {
-		await pageGoto(page, options);
-	} catch (error) {
-		if (error.name != "TimeoutError") {
-			throw error;
-		}
-	}
-	const url = page.url();
-	if (url != options.url) {
-		options.url = url;
-		await browser.close();
-		return exports.getPageData(options);
-	}
-}
-
-async function pageGoto(page, options) {
-	try {
-		await page.goto(options.url, {
-			timeout: options.browserLoadMaxTime || 0,
-			waitUntil: options.browserWaitUntil || NETWORK_IDLE_STATE
-		});
-	} catch (error) {
-		if (error.message.includes("Unknown waitUntil condition")) {
-			await page.goto(options.url, {
-				timeout: options.browserLoadMaxTime || 0,
-				waitUntil: "load"
-			});
-		} else {
-			throw error;
-		}
-	}
-}

+ 0 - 189
cli/back-ends/puppeteer.js

@@ -1,189 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global singlefile, infobar, require, exports */
-
-const puppeteer = require("puppeteer-core");
-const scripts = require("./common/scripts.js");
-
-const EXECUTION_CONTEXT_DESTROYED_ERROR = "Execution context was destroyed";
-const NETWORK_IDLE_STATE = "networkidle0";
-const NETWORK_STATES = ["networkidle0", "networkidle2", "load", "domcontentloaded"];
-
-let browser;
-
-exports.initialize = async options => {
-	if (options.browserServer) {
-		browser = await puppeteer.connect({ browserWSEndpoint: options.browserServer });
-	} else {
-		browser = await puppeteer.launch(getBrowserOptions(options));
-	}
-	return browser;
-};
-
-exports.getPageData = async (options, page) => {
-	const privatePage = !page;
-	try {
-		if (privatePage) {
-			page = await browser.newPage();
-		}
-		await setPageOptions(page, options);
-		return await getPageData(browser, page, options);
-	} finally {
-		if (privatePage) {
-			await page.close();
-		}
-	}
-};
-
-exports.closeBrowser = () => {
-	if (browser) {
-		return browser.close();
-	}
-};
-
-function getBrowserOptions(options = {}) {
-	const browserOptions = {};
-	if (options.browserHeadless !== undefined) {
-		browserOptions.headless = options.browserHeadless && !options.browserDebug;
-	}
-	browserOptions.args = options.browserArgs ? JSON.parse(options.browserArgs) : [];
-	if (options.browserDisableWebSecurity === undefined || options.browserDisableWebSecurity) {
-		browserOptions.args.push("--disable-web-security");
-	}
-	browserOptions.args.push("--no-pings");
-	if (!options.browserStartMinimized && options.browserDebug) {
-		browserOptions.args.push("--auto-open-devtools-for-tabs");
-	}
-	if (options.browserWidth && options.browserHeight) {
-		browserOptions.args.push("--window-size=" + options.browserWidth + "," + options.browserHeight);
-	}
-	browserOptions.executablePath = options.browserExecutablePath || "chrome";
-	if (options.userAgent) {
-		browserOptions.args.push("--user-agent=" + options.userAgent);
-	}
-	return browserOptions;
-}
-
-async function setPageOptions(page, options) {
-	if (options.browserWidth && options.browserHeight) {
-		await page.setViewport({
-			width: options.browserWidth,
-			height: options.browserHeight
-		});
-	}
-	if (options.browserBypassCSP === undefined || options.browserBypassCSP) {
-		await page.setBypassCSP(true);
-	}
-	if (options.httpHeaders) {
-		page.setExtraHTTPHeaders(options.httpHeaders);
-	}
-	if (options.browserStartMinimized) {
-		const session = await page.target().createCDPSession();
-		const { windowId } = await session.send("Browser.getWindowForTarget");
-		await session.send("Browser.setWindowBounds", { windowId, bounds: { windowState: "minimized" } });
-	}
-	if (options.browserCookies && options.browserCookies.length) {
-		await page.setCookie(...options.browserCookies);
-	}
-	if (options.emulateMediaFeatures) {
-		await page.emulateMediaFeatures(options.emulateMediaFeatures);
-	}
-}
-
-async function getPageData(browser, page, options) {
-	const injectedScript = await scripts.get(options);
-	await page.evaluateOnNewDocument(injectedScript);
-	if (options.browserDebug) {
-		await page.waitForTimeout(3000);
-	}
-	try {
-		await pageGoto(page, options);
-	} catch (error) {
-		if (options.browserWaitUntilFallback && error.name == "TimeoutError") {
-			const browserWaitUntil = NETWORK_STATES[(NETWORK_STATES.indexOf(options.browserWaitUntil) + 1)];
-			if (browserWaitUntil) {
-				options.browserWaitUntil = browserWaitUntil;
-				return getPageData(browser, page, options);
-			} else {
-				throw error;
-			}
-		} else if (error.name != "TimeoutError") {
-			throw error;
-		}
-	}
-	try {
-		if (options.browserWaitDelay) {
-			await page.waitForTimeout(options.browserWaitDelay);
-		}
-		return await page.evaluate(async options => {
-			const pageData = await singlefile.getPageData(options);
-			if (options.includeInfobar) {
-				await infobar.includeScript(pageData);
-			}
-			return pageData;
-		}, options);
-	} catch (error) {
-		if (error.message && error.message.includes(EXECUTION_CONTEXT_DESTROYED_ERROR)) {
-			const pageData = await handleJSRedirect(browser, options);
-			if (pageData) {
-				return pageData;
-			} else {
-				throw error;
-			}
-		} else {
-			throw error;
-		}
-	}
-}
-
-async function handleJSRedirect(browser, options) {
-	const pages = await browser.pages();
-	const page = pages[1] || pages[0];
-	try {
-		await pageGoto(page, options);
-	} catch (error) {
-		if (error.name != "TimeoutError") {
-			throw error;
-		}
-	}
-	const url = page.url();
-	if (url != options.url) {
-		options.url = url;
-		await browser.close();
-		return exports.getPageData(options);
-	}
-}
-
-async function pageGoto(page, options) {
-	const loadOptions = {
-		timeout: options.browserLoadMaxTime || 0,
-		waitUntil: options.browserWaitUntil || NETWORK_IDLE_STATE
-	};
-	if (options.content) {
-		await page.goto(options.url, { waitUntil: "domcontentloaded" });
-		await page.setContent(options.content, loadOptions);
-	} else {
-		await page.goto(options.url, loadOptions);
-	}
-}

+ 0 - 168
cli/back-ends/webdriver-chromium.js

@@ -1,168 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require, exports, process, setTimeout, clearTimeout, Buffer */
-
-const chrome = require("selenium-webdriver/chrome");
-const { Builder } = require("selenium-webdriver");
-
-exports.initialize = async () => { };
-
-exports.getPageData = async options => {
-	let driver;
-	try {
-		const builder = new Builder();
-		builder.setChromeOptions(getBrowserOptions(options));
-		driver = builder.forBrowser("chrome").build();
-		return await getPageData(driver, options);
-	} finally {
-		if (driver) {
-			driver.quit();
-		}
-	}
-};
-
-exports.closeBrowser = () => { };
-
-function getBrowserOptions(options) {
-	const chromeOptions = new chrome.Options();
-	const optionHeadless = (options.browserHeadless === undefined || options.browserHeadless) && !options.browserDebug;
-	if (optionHeadless) {
-		chromeOptions.headless();
-	}
-	if (options.browserExecutablePath) {
-		chromeOptions.setChromeBinaryPath(options.browserExecutablePath);
-	}
-	if (options.webDriverExecutablePath) {
-		process.env["PATH"] += ";" + options.webDriverExecutablePath.replace(/chromedriver(\.exe)?$/, "");
-	}
-	if (options.browserArgs) {
-		const args = JSON.parse(options.browserArgs);
-		args.forEach(argument => chromeOptions.addArguments(argument));
-	}
-	if (options.browserDisableWebSecurity === undefined || options.browserDisableWebSecurity) {
-		chromeOptions.addArguments("--disable-web-security");
-	}
-	chromeOptions.addArguments("--no-pings");
-	if (!optionHeadless) {
-		if (options.browserDebug) {
-			chromeOptions.addArguments("--auto-open-devtools-for-tabs");
-		}
-		const extensions = [];
-		if (options.browserBypassCSP === undefined || options.browserBypassCSP) {
-			extensions.push(encode(require.resolve("./extensions/signed/bypass_csp-0.0.3-an+fx.xpi")));
-		}
-		if (options.browserWaitUntil === undefined || options.browserWaitUntil == "networkidle0" || options.browserWaitUntil == "networkidle2") {
-			extensions.push(encode(require.resolve("./extensions/signed/network_idle-0.0.2-an+fx.xpi")));
-		}
-		chromeOptions.addExtensions(extensions);
-	}
-	if (options.userAgent) {
-		chromeOptions.addArguments("--user-agent=" + JSON.stringify(options.userAgent));
-	}
-	if (options.browserMobileEmulation) {
-		chromeOptions.setMobileEmulation({
-			deviceName: options.browserMobileEmulation
-		});
-	}
-	return chromeOptions;
-}
-
-async function getPageData(driver, options) {
-	const optionHeadless = (options.browserHeadless === undefined || options.browserHeadless) && !options.browserDebug;
-	driver.manage().setTimeouts({ script: options.browserLoadMaxTime, pageLoad: options.browserLoadMaxTime, implicit: options.browserLoadMaxTime });
-	if (options.browserWidth && options.browserHeight) {
-		const window = driver.manage().window();
-		if (window.setRect) {
-			window.setRect(options.browserHeight, options.browserWidth);
-		} else if (window.setSize) {
-			window.setSize(options.browserWidth, options.browserHeight);
-		}
-	}
-	const scripts = await require("./common/scripts.js").get(options);
-	if (options.browserDebug) {
-		// await driver.sleep(3000);
-	}
-	await driver.get(options.url);
-	if (options.browserCookies) {
-		await Promise.all(options.browserCookies.map(cookie => {
-			if (cookie.expires) {
-				cookie.expiry = cookie.expires;
-				delete cookie.expires;
-			}
-			return driver.manage().addCookie(cookie);
-		}));
-		await driver.get(options.url);
-	}
-	await driver.executeScript(scripts);
-	if (options.browserWaitUntil != "domcontentloaded") {
-		let scriptPromise;
-		if (!optionHeadless && (options.browserWaitUntil === undefined || options.browserWaitUntil == "networkidle0")) {
-			scriptPromise = driver.executeAsyncScript("addEventListener(\"single-file-network-idle-0\", () => arguments[0](), true)");
-		} else if (!optionHeadless && options.browserWaitUntil == "networkidle2") {
-			scriptPromise = driver.executeAsyncScript("addEventListener(\"single-file-network-idle-2\", () => arguments[0](), true)");
-		} else if (optionHeadless || options.browserWaitUntil == "load") {
-			scriptPromise = driver.executeAsyncScript("if (document.readyState == \"loading\" || document.readyState == \"interactive\") { addEventListener(\"load\", () => arguments[0]()) } else { arguments[0](); }");
-		}
-		let cancelTimeout;
-		const timeoutPromise = new Promise(resolve => {
-			const timeoutId = setTimeout(resolve, Math.max(0, options.browserLoadMaxTime - 5000));
-			cancelTimeout = () => {
-				clearTimeout(timeoutId);
-				resolve();
-			};
-		});
-		await Promise.race([scriptPromise, timeoutPromise]);
-		cancelTimeout();
-	}
-	if (options.browserWaitDelay) {
-		await driver.sleep(options.browserWaitDelay);
-	}
-	const result = await driver.executeAsyncScript(getPageDataScript(), options);
-	if (result.error) {
-		throw result.error;
-	} else {
-		return result.pageData;
-	}
-}
-
-function encode(file) {
-	return new Buffer.from(require("fs").readFileSync(file)).toString("base64");
-}
-
-function getPageDataScript() {
-	return `
-	const [options, callback] = arguments;
-	getPageData()
-		.then(pageData => callback({ pageData }))
-		.catch(error => callback({ error: error && error.toString() }));
-
-	async function getPageData() {
-		const pageData = await singlefile.getPageData(options);
-		if (options.includeInfobar) {
-			await infobar.includeScript(pageData);
-		}
-		return pageData;
-	}
-	`;
-}

+ 0 - 181
cli/back-ends/webdriver-gecko.js

@@ -1,181 +0,0 @@
-/*
- * Copyright 2010-2020 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- * 
- * This file is part of SingleFile.
- *
- *   The code in this file is free software: you can redistribute it and/or 
- *   modify it under the terms of the GNU Affero General Public License 
- *   (GNU AGPL) as published by the Free Software Foundation, either version 3
- *   of the License, or (at your option) any later version.
- * 
- *   The code in this file is distributed in the hope that it will be useful, 
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of 
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero 
- *   General Public License for more details.
- *
- *   As additional permission under GNU AGPL version 3 section 7, you may 
- *   distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU 
- *   AGPL normally required by section 4, provided you include this license 
- *   notice and a URL through which recipients can access the Corresponding 
- *   Source.
- */
-
-/* global require, exports, process, setTimeout, clearTimeout */
-
-const firefox = require("selenium-webdriver/firefox");
-const { Builder, By, Key } = require("selenium-webdriver");
-
-exports.initialize = async () => { };
-
-exports.getPageData = async options => {
-	let driver;
-	try {
-		const builder = new Builder().withCapabilities({ "pageLoadStrategy": "none" });
-		builder.setFirefoxOptions(getBrowserOptions(options));
-		driver = builder.forBrowser("firefox").build();
-		return await getPageData(driver, options);
-	} finally {
-		if (driver) {
-			driver.quit();
-		}
-	}
-};
-
-exports.closeBrowser = () => { };
-
-function getBrowserOptions(options) {
-	const firefoxOptions = new firefox.Options().setBinary(firefox.Channel.NIGHTLY);
-	if ((options.browserHeadless === undefined || options.browserHeadless) && !options.browserDebug) {
-		process.env["MOZ_HEADLESS"] = "1";
-	}
-	if (options.browserExecutablePath) {
-		firefoxOptions.setBinary(options.browserExecutablePath);
-	}
-	if (options.webDriverExecutablePath) {
-		process.env["PATH"] += ";" + options.webDriverExecutablePath.replace(/geckodriver(\.exe)?$/, "");
-	}
-	const extensions = [];
-	if (options.browserDisableWebSecurity === undefined || options.browserDisableWebSecurity) {
-		extensions.push(require.resolve("./extensions/signed/disable_web_security-0.0.3-an+fx.xpi"));
-	}
-	if (options.browserBypassCSP === undefined || options.browserBypassCSP) {
-		extensions.push(require.resolve("./extensions/signed/bypass_csp-0.0.3-an+fx.xpi"));
-	}
-	if (options.browserWaitUntil === undefined || options.browserWaitUntil == "networkidle0" || options.browserWaitUntil == "networkidle2") {
-		extensions.push(require.resolve("./extensions/signed/network_idle-0.0.2-an+fx.xpi"));
-	}
-	if (extensions.length) {
-		firefoxOptions.addExtensions(extensions);
-	}
-	if (options.browserArgs) {
-		const args = JSON.parse(options.browserArgs);
-		args.forEach(argument => firefoxOptions.addArguments(argument));
-	}
-	if (options.userAgent) {
-		firefoxOptions.setPreference("general.useragent.override", options.userAgent);
-	}
-}
-
-async function getPageData(driver, options) {
-	driver.manage().setTimeouts({ script: options.browserLoadMaxTime || 0, pageLoad: options.browserLoadMaxTime || 0, implicit: options.browserLoadMaxTime || 0 });
-	if (options.browserWidth && options.browserHeight) {
-		const window = driver.manage().window();
-		if (window.setRect) {
-			window.setRect(options.browserHeight, options.browserWidth);
-		} else if (window.setSize) {
-			window.setSize(options.browserWidth, options.browserHeight);
-		}
-	}
-	let scripts = await require("./common/scripts.js").get(options);
-	if (options.browserDebug) {
-		await driver.findElement(By.css("html")).sendKeys(Key.SHIFT + Key.F5);
-		await driver.sleep(3000);
-	}
-	scripts = scripts.replace(/globalThis/g, "window");
-	await driver.get(options.url);
-	if (options.browserCookies) {
-		await Promise.all(options.browserCookies.map(cookie => {
-			if (cookie.expires) {
-				cookie.expiry = cookie.expires;
-			}
-			return driver.manage().addCookie(cookie);
-		}));
-		await driver.get("about:blank");
-		await driver.get(options.url);
-		while (await driver.getCurrentUrl() == "about:blank") {
-			// do nothing
-		}
-	}
-	await driver.executeScript(scripts);
-	if (options.browserWaitUntil != "domcontentloaded") {
-		let scriptPromise;
-		/*
-		if (options.browserWaitUntil == "networkidle0") {
-			scriptPromise = driver.executeAsyncScript("addEventListener(\"single-file-network-idle-0\", () => arguments[0](), true)");
-		} else if (options.browserWaitUntil == "networkidle2") {
-			scriptPromise = driver.executeAsyncScript("addEventListener(\"single-file-network-idle-2\", () => arguments[0](), true)");
-		} else if (options.browserWaitUntil === undefined || options.browserWaitUntil == "load") {
-		*/
-		scriptPromise = driver.executeAsyncScript("if (document.readyState == \"loading\" || document.readyState == \"interactive\") { addEventListener(\"load\", () => arguments[0]()) } else { arguments[0](); }");
-		/*
-		}
-		*/
-		let cancelTimeout;
-		const timeoutPromise = new Promise(resolve => {
-			const timeoutId = setTimeout(resolve, Math.max(0, options.browserLoadMaxTime - 5000));
-			cancelTimeout = () => {
-				clearTimeout(timeoutId);
-				resolve();
-			};
-		});
-		await Promise.race([scriptPromise, timeoutPromise]);
-		cancelTimeout();
-	}
-	if (!options.removeFrames) {
-		await executeScriptInFrames(driver, scripts);
-	}
-	if (options.browserWaitDelay) {
-		await driver.sleep(options.browserWaitDelay);
-	}
-	const result = await driver.executeAsyncScript(getPageDataScript(), options);
-	if (result.error) {
-		throw result.error;
-	} else {
-		return result.pageData;
-	}
-}
-
-async function executeScriptInFrames(driver, scripts) {
-	let finished = false, indexFrame = 0;
-	while (!finished) {
-		try {
-			await driver.switchTo().frame(indexFrame);
-		} catch (error) {
-			finished = true;
-		}
-		if (!finished) {
-			await driver.executeScript(scripts);
-			await executeScriptInFrames(driver, scripts);
-			indexFrame++;
-			await driver.switchTo().parentFrame();
-		}
-	}
-}
-
-function getPageDataScript() {
-	return `
-	let [options, callback] = arguments;
-	getPageData()
-		.then(pageData => callback({ pageData }))
-		.catch(error => callback({ error: error && error.toString() }));
-
-	async function getPageData() {
-		const pageData = await window.singlefile.getPageData(options);
-		if (options.includeInfobar) {
-			await infobar.includeScript(pageData);
-		}
-		return pageData;
-	}
-	`;
-}

+ 1 - 54
cli/single-file

@@ -25,57 +25,4 @@
 
 /* global require */
 
-const fileUrl = require("file-url");
-const fs = require("fs");
-const api = require("./single-file-cli-api");
-run(require("./args"))
-	.catch(error => console.error(error.message || error)); // eslint-disable-line no-console	
-
-async function run(options) {
-	let urls;
-	if (options.url && !api.VALID_URL_TEST.test(options.url)) {
-		options.url = fileUrl(options.url);
-	}
-	if (options.urlsFile) {
-		urls = fs.readFileSync(options.urlsFile).toString().split("\n");
-	} else {
-		urls = [options.url];
-	}
-	if (options.browserCookiesFile) {
-		const cookiesContent = fs.readFileSync(options.browserCookiesFile).toString();
-		try {
-			options.browserCookies = JSON.parse(cookiesContent);
-		} catch (error) {
-			options.browserCookies = parseCookies(cookiesContent);
-		}
-	}
-	options.retrieveLinks = true;
-	const singlefile = await api.initialize(options);
-	await singlefile.capture(urls);
-	await singlefile.finish();
-}
-
-function parseCookies(textValue) {
-	const httpOnlyRegExp = /^#HttpOnly_(.*)/;
-	return textValue.split(/\r\n|\n/)
-		.filter(line => line.trim() && (!/^#/.test(line) || httpOnlyRegExp.test(line)))
-		.map(line => {
-			const httpOnly = httpOnlyRegExp.test(line);
-			if (httpOnly) {
-				line = line.replace(httpOnlyRegExp, "$1");
-			}
-			const values = line.split(/\t/);
-			if (values.length == 7) {
-				return {
-					domain: values[0],
-					path: values[2],
-					secure: values[3] == "TRUE",
-					expires: (values[4] && Number(values[4])) || undefined,
-					name: values[5],
-					value: values[6],
-					httpOnly
-				};
-			}
-		})
-		.filter(cookieData => cookieData);
-}
+require("single-file-cli/single-file");

+ 2 - 300
cli/single-file-cli-api.js

@@ -21,304 +21,6 @@
  *   Source.
  */
 
-/* global require, exports, URL */
+/* global require, exports */
 
-const fs = require("fs");
-const path = require("path");
-const scripts = require("./back-ends/common/scripts.js");
-const VALID_URL_TEST = /^(https?|file):\/\//;
-
-const DEFAULT_OPTIONS = {
-	removeHiddenElements: true,
-	removeUnusedStyles: true,
-	removeUnusedFonts: true,
-	removeFrames: false,
-	removeImports: true,
-	compressHTML: true,
-	compressCSS: false,
-	loadDeferredImages: true,
-	loadDeferredImagesMaxIdleTime: 1500,
-	loadDeferredImagesBlockCookies: false,
-	loadDeferredImagesBlockStorage: false,
-	loadDeferredImagesKeepZoomLevel: false,
-	loadDeferredImagesDispatchScrollEvent: false,
-	filenameTemplate: "{page-title} ({date-locale} {time-locale}).html",
-	infobarTemplate: "",
-	includeInfobar: false,
-	filenameMaxLength: 192,
-	filenameMaxLengthUnit: "bytes",
-	filenameReplacedCharacters: ["~", "+", "\\\\", "?", "%", "*", ":", "|", "\"", "<", ">", "\x00-\x1f", "\x7F"],
-	filenameReplacementCharacter: "_",
-	maxResourceSizeEnabled: false,
-	maxResourceSize: 10,
-	backgroundSave: true,
-	removeAlternativeFonts: true,
-	removeAlternativeMedias: true,
-	removeAlternativeImages: true,
-	groupDuplicateImages: true,
-	saveRawPage: false,
-	resolveFragmentIdentifierURLs: false,
-	userScriptEnabled: false,
-	saveFavicon: true,
-	includeBOM: false,
-	insertMetaCSP: true,
-	insertMetaNoIndex: false,
-	insertSingleFileComment: true,
-	blockImages: false,
-	blockStylesheets: false,
-	blockFont: false,
-	blockScripts: true,
-	blockVideos: true,
-	blockAudios: true
-};
-const STATE_PROCESSING = "processing";
-const STATE_PROCESSED = "processed";
-
-const backEnds = {
-	jsdom: "./back-ends/jsdom.js",
-	puppeteer: "./back-ends/puppeteer.js",
-	"puppeteer-firefox": "./back-ends/puppeteer-firefox.js",
-	"webdriver-chromium": "./back-ends/webdriver-chromium.js",
-	"webdriver-gecko": "./back-ends/webdriver-gecko.js",
-	"playwright-firefox": "./back-ends/playwright-firefox.js",
-	"playwright-chromium": "./back-ends/playwright-chromium.js"
-};
-
-let backend, tasks = [], maxParallelWorkers = 8, sessionFilename;
-
-exports.getBackEnd = backEndName => require(backEnds[backEndName]);
-exports.DEFAULT_OPTIONS = DEFAULT_OPTIONS;
-exports.VALID_URL_TEST = VALID_URL_TEST;
-exports.initialize = initialize;
-
-async function initialize(options) {
-	options = Object.assign({}, DEFAULT_OPTIONS, options);
-	maxParallelWorkers = options.maxParallelWorkers;
-	backend = require(backEnds[options.backEnd]);
-	await backend.initialize(options);
-	if (options.crawlSyncSession || options.crawlLoadSession) {
-		try {
-			tasks = JSON.parse(fs.readFileSync(options.crawlSyncSession || options.crawlLoadSession).toString());
-		} catch (error) {
-			if (options.crawlLoadSession) {
-				throw error;
-			}
-		}
-	}
-	if (options.crawlSyncSession || options.crawlSaveSession) {
-		sessionFilename = options.crawlSyncSession || options.crawlSaveSession;
-	}
-	return {
-		capture: urls => capture(urls, options),
-		finish: () => finish(options),
-	};
-}
-
-async function capture(urls, options) {
-	let newTasks;
-	const taskUrls = tasks.map(task => task.url);
-	newTasks = urls.map(url => createTask(url, options));
-	newTasks = newTasks.filter(task => task && !taskUrls.includes(task.url));
-	if (newTasks.length) {
-		tasks = tasks.concat(newTasks);
-		saveTasks();
-	}
-	await runTasks();
-}
-
-async function finish(options) {
-	const promiseTasks = tasks.map(task => task.promise);
-	await Promise.all(promiseTasks);
-	if (options.crawlReplaceURLs) {
-		tasks.forEach(task => {
-			try {
-				let pageContent = fs.readFileSync(task.filename).toString();
-				tasks.forEach(otherTask => {
-					if (otherTask.filename) {
-						pageContent = pageContent.replace(new RegExp(escapeRegExp("\"" + otherTask.originalUrl + "\""), "gi"), "\"" + otherTask.filename + "\"");
-						pageContent = pageContent.replace(new RegExp(escapeRegExp("'" + otherTask.originalUrl + "'"), "gi"), "'" + otherTask.filename + "'");
-						const filename = otherTask.filename.replace(/ /g, "%20");
-						pageContent = pageContent.replace(new RegExp(escapeRegExp("=" + otherTask.originalUrl + " "), "gi"), "=" + filename + " ");
-						pageContent = pageContent.replace(new RegExp(escapeRegExp("=" + otherTask.originalUrl + ">"), "gi"), "=" + filename + ">");
-					}
-				});
-				fs.writeFileSync(task.filename, pageContent);
-			} catch (error) {
-				// ignored
-			}
-		});
-	}
-	if (!options.browserDebug) {
-		return backend.closeBrowser();
-	}
-}
-
-async function runTasks() {
-	const availableTasks = tasks.filter(task => !task.status).length;
-	const processingTasks = tasks.filter(task => task.status == STATE_PROCESSING).length;
-	const promisesTasks = [];
-	for (let workerIndex = 0; workerIndex < Math.min(availableTasks, maxParallelWorkers - processingTasks); workerIndex++) {
-		promisesTasks.push(runNextTask());
-	}
-	return Promise.all(promisesTasks);
-}
-
-async function runNextTask() {
-	const task = tasks.find(task => !task.status);
-	if (task) {
-		const options = task.options;
-		let taskOptions = JSON.parse(JSON.stringify(options));
-		taskOptions.url = task.url;
-		task.status = STATE_PROCESSING;
-		saveTasks();
-		task.promise = capturePage(taskOptions);
-		const pageData = await task.promise;
-		task.status = STATE_PROCESSED;
-		if (pageData) {
-			task.filename = pageData.filename;
-			if (options.crawlLinks && testMaxDepth(task)) {
-				let newTasks = pageData.links
-					.map(urlLink => createTask(urlLink, options, task, tasks[0]))
-					.filter(task => task &&
-						testMaxDepth(task) &&
-						!tasks.find(otherTask => otherTask.url == task.url) &&
-						(!options.crawlInnerLinksOnly || task.isInnerLink) &&
-						(!options.crawlNoParent || (task.isChild || !task.isInnerLink)));
-				tasks.splice(tasks.length, 0, ...newTasks);
-			}
-		}
-		saveTasks();
-		await runTasks();
-	}
-}
-
-function testMaxDepth(task) {
-	const options = task.options;
-	return (options.crawlMaxDepth == 0 || task.depth <= options.crawlMaxDepth) &&
-		(options.crawlExternalLinksMaxDepth == 0 || task.externalLinkDepth < options.crawlExternalLinksMaxDepth);
-}
-
-function createTask(url, options, parentTask, rootTask) {
-	url = parentTask ? rewriteURL(url, options.crawlRemoveURLFragment, options.crawlRewriteRules) : url;
-	if (VALID_URL_TEST.test(url)) {
-		const isInnerLink = rootTask && url.startsWith(getHostURL(rootTask.url));
-		const rootBaseURIMatch = rootTask && rootTask.url.match(/(.*?)[^/]*$/);
-		const isChild = isInnerLink && rootBaseURIMatch && rootBaseURIMatch[1] && url.startsWith(rootBaseURIMatch[1]);
-		return {
-			url,
-			isInnerLink,
-			isChild,
-			originalUrl: url,
-			rootBaseURI: rootBaseURIMatch && rootBaseURIMatch[1],
-			depth: parentTask ? parentTask.depth + 1 : 0,
-			externalLinkDepth: isInnerLink ? -1 : parentTask ? parentTask.externalLinkDepth + 1 : -1,
-			options
-		};
-	}
-}
-
-function saveTasks() {
-	if (sessionFilename) {
-		fs.writeFileSync(sessionFilename, JSON.stringify(
-			tasks.map(task => Object.assign({}, task, {
-				status: task.status == STATE_PROCESSING ? undefined : task.status,
-				promise: undefined,
-				options: task.status && task.status == STATE_PROCESSED ? undefined : task.options
-			}))
-		));
-	}
-}
-
-function rewriteURL(url, crawlRemoveURLFragment, crawlRewriteRules) {
-	url = url.trim();
-	if (crawlRemoveURLFragment) {
-		url = url.replace(/^(.*?)#.*$/, "$1");
-	}
-	crawlRewriteRules.forEach(rewriteRule => {
-		const parts = rewriteRule.trim().split(/ +/);
-		if (parts.length) {
-			url = url.replace(new RegExp(parts[0]), parts[1] || "").trim();
-		}
-	});
-	return url;
-}
-
-function getHostURL(url) {
-	url = new URL(url);
-	return url.protocol + "//" + (url.username ? url.username + (url.password || "") + "@" : "") + url.hostname;
-}
-
-async function capturePage(options) {
-	try {
-		let filename;
-		const pageData = await backend.getPageData(options);
-		if (options.includeInfobar) {
-			await includeInfobarScript(pageData);
-		}
-		if (options.output) {
-			filename = getFilename(options.output, options);
-		} else if (options.dumpContent) {
-			console.log(pageData.content); // eslint-disable-line no-console
-		} else {
-			filename = getFilename(pageData.filename, options);
-		}
-		if (filename) {
-			const dirname = path.dirname(filename);
-			if (dirname) {
-				fs.mkdirSync(dirname, { recursive: true });
-			}
-			fs.writeFileSync(filename, pageData.content);
-		}
-		return pageData;
-	} catch (error) {
-		const message = "URL: " + options.url + "\nStack: " + error.stack + "\n";
-		if (options.errorFile) {
-			fs.writeFileSync(options.errorFile, message, { flag: "a" });
-		} else {
-			console.error(error.message || error, message); // eslint-disable-line no-console
-		}
-	}
-}
-
-function getFilename(filename, options, index = 1) {
-	if (Array.isArray(options.outputDirectory)) {
-		const outputDirectory = options.outputDirectory.pop();
-		if (outputDirectory.startsWith("/")) {
-			options.outputDirectory = outputDirectory;
-		} else {
-			options.outputDirectory = options.outputDirectory[0] + outputDirectory;
-		}
-	}
-	let outputDirectory = options.outputDirectory || "";
-	if (outputDirectory && !outputDirectory.endsWith("/")) {
-		outputDirectory += "/";
-	}
-	let newFilename = outputDirectory + filename;
-	if (options.filenameConflictAction == "overwrite") {
-		return filename;
-	} else if (options.filenameConflictAction == "uniquify" && index > 1) {
-		const regExpMatchExtension = /(\.[^.]+)$/;
-		const matchExtension = newFilename.match(regExpMatchExtension);
-		if (matchExtension && matchExtension[1]) {
-			newFilename = newFilename.replace(regExpMatchExtension, " (" + index + ")" + matchExtension[1]);
-		} else {
-			newFilename += " (" + index + ")";
-		}
-	}
-	if (fs.existsSync(newFilename)) {
-		if (options.filenameConflictAction != "skip") {
-			return getFilename(filename, options, index + 1);
-		}
-	} else {
-		return newFilename;
-	}
-}
-
-function escapeRegExp(string) {
-	return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-
-async function includeInfobarScript(pageData) {
-	const infobarContent = await scripts.getInfobarScript();
-	pageData.content += "<script>document.currentScript.remove();" + infobarContent + "</script>";
-}
+exports.module = require("single-file-cli/single-file-cli-api");

+ 5 - 1
companion/README.MD

@@ -8,7 +8,11 @@ SingleFile Companion is a program that runs outside the browser. It can:
 
 ## Install
 
- - Install [SingleFile CLI](https://github.com/gildas-lormeau/SingleFile/tree/master/cli) with NPM
+ - Install [Node.js](https://nodejs.org)
+
+ - Download the [SingleFile project](https://github.com/gildas-lormeau/SingleFile/archive/master.zip) zip file and unzip it somewhere on your disk.
+
+ - Run `npm install` in the root folder of SingleFile.
 
  - In the `companion` folder of SingleFile, go into the subfolder corresponding to your OS and your browser. For example, if you use Chome on Linux:
 

+ 5 - 5
companion/singlefile_companion.js

@@ -29,11 +29,11 @@ const fs = require("fs");
 const path = require("path");
 const nativeMessage = require("./lib/messaging.js");
 const backEnds = {
-	jsdom: "./../cli/back-ends/jsdom.js",
-	puppeteer: "./../cli/back-ends/puppeteer.js",
-	"puppeteer-firefox": "./../cli/back-ends/puppeteer-firefox.js",
-	"webdriver-chromium": "./../cli/back-ends/webdriver-chromium.js",
-	"webdriver-gecko": "./../cli/back-ends/webdriver-gecko.js"
+	jsdom: "single-file-cli/back-ends/jsdom.js",
+	puppeteer: "single-file-cli/back-ends/puppeteer.js",
+	"puppeteer-firefox": "single-file-cli/back-ends/puppeteer-firefox.js",
+	"webdriver-chromium": "single-file-cli/back-ends/webdriver-chromium.js",
+	"webdriver-gecko": "single-file-cli/back-ends/webdriver-gecko.js"
 };
 
 process.stdin

+ 15 - 0
package-lock.json

@@ -1097,6 +1097,21 @@
 			"resolved": "https://registry.npmjs.org/set-immediate-shim/-/set-immediate-shim-1.0.1.tgz",
 			"integrity": "sha1-SysbJ+uAip+NzEgaWOXlb1mfP2E="
 		},
+		"single-file-cli": {
+			"version": "1.0.0",
+			"resolved": "https://registry.npmjs.org/single-file-cli/-/single-file-cli-1.0.0.tgz",
+			"integrity": "sha512-IWPfdRFaiOc8CvxVs0uWtgBghS+KQAoL18J4dcznTyeLy0d1VrggAyo9fcvIoD0Hz5I0gherWUi0VEsYhntWOQ==",
+			"requires": {
+				"file-url": "3.0.0",
+				"iconv-lite": "0.6.3",
+				"jsdom": "19.0.0",
+				"puppeteer-core": "13.5.2",
+				"selenium-webdriver": "4.1.1",
+				"single-file-core": "1.0.6",
+				"strong-data-uri": "1.0.6",
+				"yargs": "17.4.0"
+			}
+		},
 		"single-file-core": {
 			"version": "1.0.6",
 			"resolved": "https://registry.npmjs.org/single-file-core/-/single-file-core-1.0.6.tgz",

+ 1 - 7
package.json

@@ -9,14 +9,8 @@
 		"single-file": "./cli/single-file"
 	},
 	"dependencies": {
-		"file-url": "3.0.0",
-		"iconv-lite": "0.6.3",
-		"jsdom": "19.0.0",
-		"puppeteer-core": "13.5.2",
-		"selenium-webdriver": "4.1.1",
 		"single-file-core": "1.0.6",
-		"strong-data-uri": "1.0.6",
-		"yargs": "17.4.0"
+		"single-file-cli": "1.0.0"
 	},
 	"devDependencies": {
 		"@rollup/plugin-node-resolve": "13.3.0",