Sfoglia il codice sorgente

add option "save original URLs of embedded resources"
fix #842

Gildas 4 anni fa
parent
commit
31e35bb1e9

+ 4 - 0
_locales/de/messages.json

@@ -163,6 +163,10 @@
 		"message": "Blockieren gemischter Inhalte",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "Speichern der Original-URLs von eingebetteten Ressourcen",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "Einfügen Inhalt des Infoknopfs in die gespeicherte Seite",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/en/messages.json

@@ -162,6 +162,10 @@
 	"optionBlockMixedContent": {
 		"message": "block mixed content",
 		"description": "Options page label: 'block mixed content'"
+	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
 	},	
 	"optionIncludeInfobar": {
 		"message": "include the infobar in the saved page",

+ 4 - 0
_locales/es/messages.json

@@ -163,6 +163,10 @@
 		"message": "bloqueo de contenido mixto",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "guardar las URL originales de los recursos incrustados",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "incluir la barra informativa en la página guardada",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/fr/messages.json

@@ -163,6 +163,10 @@
 		"message": "bloquer les contenus mixtes",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "sauvegarder les URLs d'origine des ressources embarquées",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "inclure la barre d'information dans la page sauvegardée",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/it/messages.json

@@ -163,6 +163,10 @@
 		"message": "block mixed content",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "includi una barra informativa nella pagina salvata",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/ja/messages.json

@@ -163,6 +163,10 @@
 		"message": "block mixed content",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "保存されたページに、infobar を含める",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/pl/messages.json

@@ -163,6 +163,10 @@
 		"message": "blokuj zawartość mieszaną",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "dołączaj pasek informacyjny do zapisanej strony",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/ru/messages.json

@@ -163,6 +163,10 @@
 		"message": "блокировать смешанное содержимое",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "добавить информационную панель в сохраняемую страницу",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/uk/messages.json

@@ -163,6 +163,10 @@
 		"message": "block mixed content",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "додавати інформ. панель в сторінку що зберігається",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/zh_CN/messages.json

@@ -163,6 +163,10 @@
 		"message": "拦截混合內容",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "在已保存页面中将信息栏包括在内",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 4 - 0
_locales/zh_TW/messages.json

@@ -163,6 +163,10 @@
 		"message": "攔截混合內容",
 		"description": "Options page label: 'block mixed content'"
 	},
+	"optionSaveOriginalURLs": {
+		"message": "save original URLs of embedded resources",
+		"description": "Options page label: 'save original URLs of embedded resources'"
+	},
 	"optionIncludeInfobar": {
 		"message": "在已保存頁面中將信息欄包括在內",
 		"description": "Options page label: 'include the infobar in the saved page'"

+ 2 - 1
extension/core/bg/config.js

@@ -113,7 +113,8 @@ const DEFAULT_CONFIG = {
 	passReferrerOnError: false,
 	insertSingleFileComment: true,
 	blockMixedContent: false,
-	woleetKey: ""
+	saveOriginalURLs: false,
+	woleetKey: "",	
 };
 
 let configStorage;

+ 5 - 0
extension/ui/bg/ui-options.js

@@ -114,6 +114,7 @@ const applySystemThemeLabel = document.getElementById("applySystemThemeLabel");
 const warnUnsavedPageLabel = document.getElementById("warnUnsavedPageLabel");
 const infobarTemplateLabel = document.getElementById("infobarTemplateLabel");
 const blockMixedContentLabel = document.getElementById("blockMixedContentLabel");
+const saveOriginalURLsLabel = document.getElementById("saveOriginalURLsLabel");
 const includeInfobarLabel = document.getElementById("includeInfobarLabel");
 const miscLabel = document.getElementById("miscLabel");
 const helpLabel = document.getElementById("helpLabel");
@@ -183,6 +184,7 @@ const ignoredBookmarkFoldersInput = document.getElementById("ignoredBookmarkFold
 const groupDuplicateImagesInput = document.getElementById("groupDuplicateImagesInput");
 const infobarTemplateInput = document.getElementById("infobarTemplateInput");
 const blockMixedContentInput = document.getElementById("blockMixedContentInput");
+const saveOriginalURLsInput = document.getElementById("saveOriginalURLsInput");
 const includeInfobarInput = document.getElementById("includeInfobarInput");
 const confirmInfobarInput = document.getElementById("confirmInfobarInput");
 const autoCloseInput = document.getElementById("autoCloseInput");
@@ -546,6 +548,7 @@ miscLabel.textContent = browser.i18n.getMessage("optionsMiscSubTitle");
 helpLabel.textContent = browser.i18n.getMessage("optionsHelpLink");
 infobarTemplateLabel.textContent = browser.i18n.getMessage("optionInfobarTemplate");
 blockMixedContentLabel.textContent = browser.i18n.getMessage("optionBlockMixedContent");
+saveOriginalURLsLabel.textContent = browser.i18n.getMessage("optionSaveOriginalURLs");
 includeInfobarLabel.textContent = browser.i18n.getMessage("optionIncludeInfobar");
 confirmInfobarLabel.textContent = browser.i18n.getMessage("optionConfirmInfobar");
 autoCloseLabel.textContent = browser.i18n.getMessage("optionAutoClose");
@@ -756,6 +759,7 @@ async function refresh(profileName) {
 	ignoredBookmarkFoldersInput.disabled = !profileOptions.saveCreatedBookmarks;
 	infobarTemplateInput.value = profileOptions.infobarTemplate;
 	blockMixedContentInput.checked = profileOptions.blockMixedContent;
+	saveOriginalURLsInput.checked = profileOptions.saveOriginalURLs;
 	includeInfobarInput.checked = profileOptions.includeInfobar;
 	confirmInfobarInput.checked = profileOptions.confirmInfobarContent;
 	autoCloseInput.checked = profileOptions.autoClose;
@@ -837,6 +841,7 @@ async function update() {
 			groupDuplicateImages: groupDuplicateImagesInput.checked,
 			infobarTemplate: infobarTemplateInput.value,
 			blockMixedContent: blockMixedContentInput.checked,
+			saveOriginalURLs: saveOriginalURLsInput.checked,
 			includeInfobar: includeInfobarInput.checked,
 			confirmInfobarContent: confirmInfobarInput.checked,
 			autoClose: autoCloseInput.checked,

+ 7 - 0
extension/ui/pages/help.html

@@ -226,6 +226,13 @@
 								href="https://developer.mozilla.org/docs/Web/Security/Mixed_content"
 								target="_blank">here</a>.</p>
 					</li>
+					<li data-options-label="saveOriginalURLsLabel"> <span class="option"></span>Option: save original
+						URLs of embedded resources</span>
+						<p>Check this option to save the URLs of the resources embedded into the saved page. URLs
+							are stored in attributes with a name prefixed with "data-sf-original-" and suffixed with the
+							original attribute name (e.g. "data-sf-original-src", "data-sf-original-href") for HTML
+							contents. For CSS contents, URLs are stored as comments beginning with "original URL:".
+					</li>
 					<li data-options-label="includeInfobarLabel"> <span class="option"></span>Option: include infobar in
 						the saved page</span>
 						<p>Check this

+ 4 - 0
extension/ui/pages/options.html

@@ -106,6 +106,10 @@
 				<label for="blockMixedContentInput" id="blockMixedContentLabel"></label>
 				<input type="checkbox" id="blockMixedContentInput">
 			</div>
+			<div class="option">
+				<label for="saveOriginalURLsInput" id="saveOriginalURLsLabel"></label>
+				<input type="checkbox" id="saveOriginalURLsInput">
+			</div>
 			<div class="option">
 				<label for="includeInfobarInput" id="includeInfobarLabel"></label>
 				<input type="checkbox" id="includeInfobarInput">

+ 1 - 1
lib/single-file/modules/css-fonts-alt-minifier.js

@@ -39,7 +39,7 @@ const FontFace = globalThis.FontFace;
 const REGEXP_URL_SIMPLE_QUOTES_FN = /url\s*\(\s*'(.*?)'\s*\)/i;
 const REGEXP_URL_DOUBLE_QUOTES_FN = /url\s*\(\s*"(.*?)"\s*\)/i;
 const REGEXP_URL_NO_QUOTES_FN = /url\s*\(\s*(.*?)\s*\)/i;
-const REGEXP_URL_FUNCTION = /(url|local)\(.*?\)\s*(,|$)/g;
+const REGEXP_URL_FUNCTION = /(url|local|-sf-url-original)\(.*?\)\s*(,|$)/g;
 const REGEXP_SIMPLE_QUOTES_STRING = /^'(.*?)'$/;
 const REGEXP_DOUBLE_QUOTES_STRING = /^"(.*?)"$/;
 const REGEXP_URL_FUNCTION_WOFF = /^url\(\s*["']?data:font\/(woff2?)/;

+ 72 - 9
lib/single-file/single-file-core.js

@@ -887,6 +887,11 @@ class Processor {
 	resolveHrefs() {
 		this.doc.querySelectorAll("a[href], area[href], link[href]").forEach(element => {
 			const href = element.getAttribute("href").trim();
+			if (element.tagName == "LINK" && element.rel.includes("stylesheet")) {
+				if (this.options.saveOriginalURLs && !isDataURL(href)) {
+					element.setAttribute("data-sf-original-href", href);
+				}
+			}
 			if (!testIgnoredPath(href)) {
 				let resolvedURL;
 				try {
@@ -915,7 +920,7 @@ class Processor {
 			if (this.options.compressCSS) {
 				styleContent = util.compressCSS(styleContent);
 			}
-			styleContent = ProcessorHelper.resolveStylesheetURLs(styleContent, this.baseURI, this.workStyleElement);
+			styleContent = ProcessorHelper.resolveStylesheetURLs(styleContent, this.baseURI, this.workStyleElement, this.options.saveOriginalURLs);
 			const declarationList = cssTree.parse(styleContent, { context: "declarationList" });
 			this.styles.set(element, declarationList);
 		});
@@ -933,7 +938,8 @@ class Processor {
 				rootDocument: this.options.rootDocument,
 				frameId: this.options.windowId,
 				resourceReferrer: this.options.resourceReferrer,
-				blockMixedContent: this.options.blockMixedContent
+				blockMixedContent: this.options.blockMixedContent,
+				saveOriginalURLs: this.options.saveOriginalURLs
 			};
 			let mediaText;
 			if (element.media) {
@@ -1001,6 +1007,10 @@ class Processor {
 				if (frameElement.tagName == "OBJECT") {
 					frameElement.setAttribute("data", "data:text/html,");
 				} else {
+					const src = frameElement.getAttribute("src");
+					if (this.options.saveOriginalURLs && !isDataURL(src)) {
+						frameElement.setAttribute("data-sf-original-src", src);
+					}
 					frameElement.removeAttribute("src");
 					frameElement.removeAttribute("srcdoc");
 				}
@@ -1101,6 +1111,9 @@ class Processor {
 		const linkElements = Array.from(this.doc.querySelectorAll("link[rel=import][href]"));
 		await Promise.all(linkElements.map(async linkElement => {
 			const resourceURL = linkElement.href;
+			if (this.options.saveOriginalURLs && !isDataURL(resourceURL)) {
+				linkElement.setAttribute("data-sf-original-href", resourceURL);
+			}
 			linkElement.removeAttribute("href");
 			const options = Object.create(this.options);
 			options.insertSingleFileComment = false;
@@ -1205,8 +1218,14 @@ class Processor {
 			let scriptSrc;
 			if (element.tagName == "SCRIPT") {
 				scriptSrc = element.getAttribute("src");
+				if (this.options.saveOriginalURLs && !isDataURL(scriptSrc)) {
+					element.setAttribute("data-sf-original-src", scriptSrc);
+				}
 			} else {
 				scriptSrc = element.getAttribute("href");
+				if (this.options.saveOriginalURLs && !isDataURL(scriptSrc)) {
+					element.setAttribute("data-sf-original-href", scriptSrc);
+				}
 			}
 			element.removeAttribute("integrity");
 			element.textContent = "";
@@ -1322,6 +1341,9 @@ class Processor {
 			if (stylesheetInfo) {
 				this.stylesheets.delete(styleElement);
 				let stylesheetContent = cssTree.generate(stylesheetInfo.stylesheet);
+				if (this.options.saveOriginalURLs) {
+					stylesheetContent = replaceOriginalURLs(stylesheetContent);
+				}
 				styleElement.textContent = stylesheetContent;
 				if (stylesheetInfo.mediaText) {
 					styleElement.media = stylesheetInfo.mediaText;
@@ -1339,6 +1361,10 @@ class Processor {
 					styleElement.media = stylesheetInfo.mediaText;
 				}
 				let stylesheetContent = cssTree.generate(stylesheetInfo.stylesheet);
+				if (this.options.saveOriginalURLs) {
+					stylesheetContent = replaceOriginalURLs(stylesheetContent);
+					styleElement.setAttribute("data-sf-original-href", linkElement.getAttribute("data-sf-original-href"));
+				}
 				styleElement.textContent = stylesheetContent;
 				linkElement.parentElement.replaceChild(styleElement, linkElement);
 			} else {
@@ -1353,6 +1379,9 @@ class Processor {
 			if (declarations) {
 				this.styles.delete(element);
 				let styleContent = cssTree.generate(declarations);
+				if (this.options.saveOriginalURLs) {
+					styleContent = replaceOriginalURLs(styleContent);
+				}
 				element.setAttribute("style", styleContent);
 			} else {
 				element.setAttribute("style", "");
@@ -1370,12 +1399,16 @@ class Processor {
 				this.doc.head.appendChild(styleElement);
 			}
 			let stylesheetContent = "";
-			this.cssVariables.forEach((content, indexResource) => {
+			this.cssVariables.forEach(({ content, url }, indexResource) => {
 				this.cssVariables.delete(indexResource);
 				if (stylesheetContent) {
 					stylesheetContent += ";";
 				}
-				stylesheetContent += `${SINGLE_FILE_VARIABLE_NAME_PREFIX + indexResource}:url("${content}")`;
+				stylesheetContent += `${SINGLE_FILE_VARIABLE_NAME_PREFIX + indexResource}: `;
+				if (this.options.saveOriginalURLs) {
+					stylesheetContent += `/* original URL: ${url} */ `;
+				}
+				stylesheetContent += `url("${content}")`;
 			});
 			styleElement.textContent = ":root{" + stylesheetContent + "}";
 		}
@@ -1591,7 +1624,7 @@ class ProcessorHelper {
 	}
 
 	static async resolveImportURLs(stylesheetContent, baseURI, options, workStylesheet, importedStyleSheets = new Set()) {
-		stylesheetContent = ProcessorHelper.resolveStylesheetURLs(stylesheetContent, baseURI, workStylesheet);
+		stylesheetContent = ProcessorHelper.resolveStylesheetURLs(stylesheetContent, baseURI, workStylesheet, options.saveOriginalURLs);
 		const imports = getImportFunctions(stylesheetContent);
 		await Promise.all(imports.map(async cssImport => {
 			const match = matchImport(cssImport);
@@ -1666,8 +1699,11 @@ class ProcessorHelper {
 		}
 	}
 
-	static resolveStylesheetURLs(stylesheetContent, baseURI, workStylesheet) {
+	static resolveStylesheetURLs(stylesheetContent, baseURI, workStylesheet, saveOriginalURLs) {
 		const urlFunctions = getUrlFunctions(stylesheetContent, true);
+		if (saveOriginalURLs) {
+			stylesheetContent = addOriginalURLs(stylesheetContent);
+		}
 		urlFunctions.map(urlFunction => {
 			const originalResourceURL = matchURL(urlFunction);
 			let resourceURL = normalizeURL(originalResourceURL);
@@ -1823,7 +1859,7 @@ class ProcessorHelper {
 								}
 							});
 							if (variableDefined) {
-								cssVariables.set(indexResource, content);
+								cssVariables.set(indexResource, { content, url: originalResourceURL });
 								tokens.forEach(({ parent, token, value }) => parent.replace(token, value));
 							}
 						}
@@ -1850,6 +1886,9 @@ class ProcessorHelper {
 			if (resourceURL != null) {
 				resourceURL = normalizeURL(resourceURL);
 				let originURL = resourceElement.dataset.singleFileOriginURL;
+				if (options.saveOriginalURLs && !isDataURL(resourceURL)) {
+					resourceElement.setAttribute("data-sf-original-" + attributeName, resourceURL);
+				}
 				delete resourceElement.dataset.singleFileOriginURL;
 				if (!testIgnoredPath(resourceURL)) {
 					resourceElement.setAttribute(attributeName, EMPTY_IMAGE);
@@ -1892,7 +1931,7 @@ class ProcessorHelper {
 									const isSVG = content.startsWith(PREFIX_DATA_URI_IMAGE_SVG);
 									if (processDuplicates && duplicate && options.groupDuplicateImages && !isSVG && util.getContentSize(content) < SINGLE_FILE_VARIABLE_MAX_SIZE) {
 										if (ProcessorHelper.replaceImageSource(resourceElement, SINGLE_FILE_VARIABLE_NAME_PREFIX + indexResource, options)) {
-											cssVariables.set(indexResource, content);
+											cssVariables.set(indexResource, { content, url: originURL });
 											const declarationList = cssTree.parse(resourceElement.getAttribute("style"), { context: "declarationList" });
 											styles.set(resourceElement, declarationList);
 										} else {
@@ -1918,6 +1957,9 @@ class ProcessorHelper {
 				attributeName = "href";
 				originalResourceURL = resourceElement.getAttribute(attributeName);
 			}
+			if (options.saveOriginalURLs && !isDataURL(originalResourceURL)) {
+				resourceElement.setAttribute("data-sf-original-href", originalResourceURL);
+			}
 			let resourceURL = normalizeURL(originalResourceURL);
 			if (testValidPath(resourceURL) && !testIgnoredPath(resourceURL)) {
 				resourceElement.setAttribute(attributeName, EMPTY_IMAGE);
@@ -1958,7 +2000,9 @@ class ProcessorHelper {
 
 	static async processSrcset(resourceElements, attributeName, baseURI, batchRequest) {
 		await Promise.all(Array.from(resourceElements).map(async resourceElement => {
-			const srcset = util.parseSrcset(resourceElement.getAttribute(attributeName));
+			const originSrcset = resourceElement.getAttribute(attributeName);
+			const srcset = util.parseSrcset(originSrcset);
+			resourceElement.setAttribute("data-sf-original-srcset", originSrcset);
 			const srcsetValues = await Promise.all(srcset.map(async srcsetValue => {
 				let resourceURL = normalizeURL(srcsetValue.url);
 				if (!testIgnoredPath(resourceURL)) {
@@ -2169,6 +2213,25 @@ function matchURL(stylesheetContent) {
 	return match && match[1];
 }
 
+function addOriginalURLs(stylesheetContent) {
+	return stylesheetContent.replace(REGEXP_URL_FN, function (match, _0, url, _1, url2, _2, url3) {
+		url = url || url2 || url3;
+		if (isDataURL(url)) {
+			return match;
+		} else {
+			return "-sf-url-original(" + JSON.stringify(url) + ") " + match;
+		}
+	});
+}
+
+function isDataURL(url) {
+	return url && (url.startsWith(DATA_URI_PREFIX) || url.startsWith(BLOB_URI_PREFIX));
+}
+
+function replaceOriginalURLs(stylesheetContent) {
+	return stylesheetContent.replace(/-sf-url-original\("(.*?)"\)/g, "/* original URL: $1 */");
+}
+
 function testIgnoredPath(resourceURL) {
 	return resourceURL && (resourceURL.startsWith(DATA_URI_PREFIX) || resourceURL == ABOUT_BLANK_URI);
 }