Ver código fonte

crawl http(s) URLs only

Former-commit-id: 8e0cf7cd00fce0990e703cb42beabb888916c56f
Gildas 5 anos atrás
pai
commit
b4572cf17e
1 arquivos alterados com 1 adições e 1 exclusões
  1. 1 1
      cli/single-file

+ 1 - 1
cli/single-file

@@ -85,7 +85,7 @@ async function runNextTask(tasks, options) {
 		if (pageData && options.crawlLinks) {
 			pageData.links = pageData.links
 				.map(urlLink => rewriteURL(urlLink, options.urlRewriteRules))
-				.filter(urlLink => !tasks.find(task => task.url == urlLink));
+				.filter(urlLink => (urlLink.startsWith("http:") || urlLink.startsWith("https:")) && !tasks.find(task => task.url == urlLink));
 			if (options.crawlInnerLinksOnly) {
 				const urlHost = getHostURL(options.url);
 				pageData.links = pageData.links.filter(urlLink => urlLink.startsWith(urlHost));