Prechádzať zdrojové kódy

added crawl-replace-urls option

Former-commit-id: 8f801413ea74ac014fa8a1b06917bb0bd0cbeffd
Gildas 5 rokov pred
rodič
commit
64320ce473
2 zmenil súbory, kde vykonal 22 pridanie a 0 odobranie
  1. 3 0
      cli/args.js
  2. 19 0
      cli/single-file

+ 3 - 0
cli/args.js

@@ -71,6 +71,7 @@ module.exports = require("yargs")
 		"crawl-links": false,
 		"crawl-inner-links-only": true,
 		"crawl-max-depth": 1,
+		"crawl-replace-urls": true,
 		"url-rewrite-rules": []
 	})
 	.options("back-end", { description: "Back-end to use" })
@@ -107,6 +108,8 @@ module.exports = require("yargs")
 	.boolean("crawl-inner-links-only")
 	.options("crawl-max-depth", { description: "Max depth when crawl pages found via inner links" })
 	.number("crawl-max-depth")
+	.options("crawl-replace-urls", { description: "Replace URLs of saved pages with relative paths of saved pages on the filesystem" })
+	.boolean("crawl-replace-urls")
 	.options("error-file")
 	.string("error-file")
 	.options("filename-template", { description: "Template used to generate the output filename (see help page of the extension for more info)" })

+ 19 - 0
cli/single-file

@@ -41,6 +41,7 @@ const backEnds = {
 options.compressCSS = options.compressCss;
 options.compressHTML = options.compressHtml;
 options.includeBOM = options.includeBom;
+options.crawlReplaceURLs = options.crawlReplaceUrls;
 if (options.url && !VALID_URL_TEST.test(options.url)) {
 	options.url = fileUrl(options.url);
 }
@@ -60,6 +61,19 @@ async function run(options) {
 		tasks = [{ url: rewriteURL(options.url, options.urlRewriteRules), depth: 0 }];
 	}
 	await runTasks(tasks, options);
+	if (options.crawlReplaceURLs) {
+		tasks.forEach(task => {
+			let pageContent = fs.readFileSync(task.filename).toString();
+			tasks.forEach(otherTask => {
+				pageContent = pageContent.replace(new RegExp(escapeRegExp("\"" + otherTask.url + "\""), "gi"), "\"" + otherTask.filename + "\"");
+				pageContent = pageContent.replace(new RegExp(escapeRegExp("'" + otherTask.url + "'"), "gi"), "'" + otherTask.filename + "'");
+				const filename = otherTask.filename.replace(/ /g, "%20");
+				pageContent = pageContent.replace(new RegExp(escapeRegExp("=" + otherTask.url + " "), "gi"), "=" + filename + " ");
+				pageContent = pageContent.replace(new RegExp(escapeRegExp("=" + otherTask.url + ">"), "gi"), "=" + filename + ">");
+			});
+			fs.writeFileSync(task.filename, pageContent);
+		});
+	}
 	if (!options.browserDebug) {
 		return backend.closeBrowser();
 	}
@@ -84,6 +98,7 @@ async function runNextTask(tasks, options) {
 		task.status = "processing";
 		const pageData = await capturePage(options);
 		task.status = "processed";
+		task.filename = pageData.filename;
 		if (pageData && options.crawlLinks && task.depth < options.crawlMaxDepth) {
 			pageData.links = pageData.links
 				.map(urlLink => rewriteURL(urlLink, options.urlRewriteRules))
@@ -153,4 +168,8 @@ function getFilename(filename, index = 1) {
 	} else {
 		return newFilename;
 	}
+}
+
+function escapeRegExp(string) {
+	return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }