Ver código fonte

added --crawl-remove-url-fragment option (CLI)

Former-commit-id: 9d7c8f853d89bdd84498a0cc613de7ec0bc03ad3
Gildas 5 anos atrás
pai
commit
da690e16f9
2 arquivos alterados com 7 adições e 0 exclusões
  1. 4 0
      cli/args.js
  2. 3 0
      cli/single-file

+ 4 - 0
cli/args.js

@@ -70,6 +70,7 @@ const args = require("yargs")
 		"include-BOM": false,
 		"crawl-links": false,
 		"crawl-inner-links-only": true,
+		"crawl-remove-url-fragment": true,
 		"crawl-max-depth": 1,
 		"crawl-external-links-max-depth": 1,
 		"crawl-replace-urls": false,
@@ -107,6 +108,8 @@ const args = require("yargs")
 	.boolean("crawl-links")
 	.options("crawl-inner-links-only", { description: "Crawl pages found via inner links only if they are hosted on the same domain" })
 	.boolean("crawl-inner-links-only")
+	.options("crawl-remove-url-fragment", { description: "Remove URL fragments found in links" })
+	.boolean("crawl-remove-url-fragment")
 	.options("crawl-max-depth", { description: "Max depth when crawling pages found in internal and external links (0: infinite)" })
 	.number("crawl-max-depth")
 	.options("crawl-external-links-max-depth", { description: "Max depth when crawling pages found in external links (0: infinite)" })
@@ -175,4 +178,5 @@ args.compressCSS = args.compressCss;
 args.compressHTML = args.compressHtml;
 args.includeBOM = args.includeBom;
 args.crawlReplaceURLs = args.crawlReplaceUrls;
+args.crawlRemoveURLFragment = args.crawlRemoveUrlFragment;
 module.exports = args;

+ 3 - 0
cli/single-file

@@ -134,6 +134,9 @@ function createTask(url, parentTask, rootTask) {
 
 function rewriteURL(url) {
 	url = url.trim();
+	if (options.crawlRemoveURLFragment) {
+		url = url.replace(/^(.*?)#.*$/, "$1");
+	}
 	options.crawlRewriteRules.forEach(rewriteRule => {
 		const parts = rewriteRule.trim().split(/ +/);
 		if (parts.length) {