Procházet zdrojové kódy

improved fonts removal

Gildas před 7 roky
rodič
revize
706c58582f

+ 1 - 0
extension/core/bg/processor.js

@@ -40,6 +40,7 @@ singlefile.processor = (() => {
 		options.responsiveImageData = message.responsiveImageData;
 		options.imageData = message.imageData;
 		options.postersData = message.postersData;
+		options.usedFonts = message.usedFonts;
 		options.insertSingleFileComment = true;
 		options.insertFaviconLink = true;
 		options.backgroundTab = true;

+ 2 - 0
extension/core/content/content-autosave.js

@@ -57,6 +57,7 @@ this.singlefile.autosave = this.singlefile.autosave || (async () => {
 					responsiveImageData: docData.responsiveImageData,
 					imageData: docData.imageData,
 					postersData: docData.postersData,
+					usedFonts: docData.usedFonts,
 					framesData,
 					url: location.href
 				});
@@ -97,6 +98,7 @@ this.singlefile.autosave = this.singlefile.autosave || (async () => {
 				responsiveImageData: docData.responsiveImageData,
 				imageData: docData.imageData,
 				postersData: docData.postersData,
+				usedFonts: docData.usedFonts,
 				framesData,
 				url: location.href
 			});

+ 38 - 20
lib/single-file/css-fonts-minifier.js

@@ -40,8 +40,8 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 	const EMPTY_URL_SOURCE = "url(\"data:base64,\")";
 	const PSEUDO_ELEMENTS = ["::after", "::before", "::first-line", "::first-letter", ":before", ":after", ":first-line", ":first-letter", "::placeholder", "::selection", "::marker", "::cue", "::slotted", "::spelling-error", "::grammar-error"];
 	const FONT_WEIGHTS = {
-		normal: 400,
-		bold: 700
+		normal: "400",
+		bold: "700"
 	};
 	const FONT_STRETCHES = {
 		"ultra-condensed": "50%",
@@ -56,7 +56,7 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 	};
 
 	return {
-		removeUnusedFonts: doc => {
+		removeUnusedFonts: (doc, options) => {
 			const stats = {
 				rules: {
 					processed: 0,
@@ -67,7 +67,7 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 					discarded: 0
 				}
 			};
-			const fontsInfo = { declared: new Set(), used: [] };
+			const fontsInfo = { declared: [], used: [] };
 			let pseudoElementsContent = "";
 			doc.querySelectorAll("style").forEach(style => {
 				if (style.sheet) {
@@ -84,22 +84,23 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 				}
 			});
 			const variableFound = fontsInfo.used.find(fontNames => fontNames.find(fontName => fontName.startsWith("var(--")));
-			let unusedFonts;
+			let unusedFonts, filteredUsedFonts;
 			if (variableFound) {
 				unusedFonts = [];
 			} else {
-				const filteredUsedFonts = new Set();
-				fontsInfo.used.forEach(fontNames => fontNames.forEach(fontName => {
-					if (fontsInfo.declared.has(fontName)) {
-						filteredUsedFonts.add(fontName);
+				filteredUsedFonts = new Map();
+				fontsInfo.used.forEach(fontNames => fontNames.forEach(familyName => {
+					if (fontsInfo.declared.find(fontInfo => fontInfo.familyName == familyName)) {
+						const optionalData = options.usedFonts && options.usedFonts.filter(fontInfo => fontInfo.fontFamily == familyName);
+						filteredUsedFonts.set(familyName, optionalData);
 					}
 				}));
-				unusedFonts = Array.from(fontsInfo.declared).filter(familyName => !filteredUsedFonts.has(familyName));
+				unusedFonts = fontsInfo.declared.filter(fontInfo => !filteredUsedFonts.has(fontInfo.familyName));
 			}
 			const docContent = doc.body.innerText + pseudoElementsContent;
 			doc.querySelectorAll("style").forEach(style => {
 				if (style.sheet) {
-					style.textContent = filterUnusedFonts(doc, style.sheet.cssRules, unusedFonts, docContent);
+					style.textContent = filterUnusedFonts(doc, style.sheet.cssRules, unusedFonts, filteredUsedFonts, docContent);
 					stats.rules.discarded -= style.sheet.cssRules.length;
 				}
 			});
@@ -146,9 +147,12 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 					}
 				} else {
 					if (rule.type == CSSRule.FONT_FACE_RULE && rule.style) {
-						const fontFamilyName = removeQuotes(rule.style.getPropertyValue("font-family"));
-						if (fontFamilyName) {
-							fontsInfo.declared.add(fontFamilyName);
+						const familyName = removeQuotes(rule.style.getPropertyValue("font-family"));
+						const fontWeight = getFontWeight(rule.style.getPropertyValue("font-weight")) || "400";
+						const fontStyle = rule.style.getPropertyValue("font-style") || "normal";
+						const fontVariant = rule.style.getPropertyValue("font-variant") || "normal";
+						if (familyName) {
+							fontsInfo.declared.push({ familyName, fontWeight, fontStyle, fontVariant });
 						}
 					}
 				}
@@ -277,19 +281,33 @@ this.fontsMinifier = this.fontsMinifier || (() => {
 		return cssText;
 	}
 
-	function filterUnusedFonts(doc, rules, unusedFonts, docContent) {
+	function filterUnusedFonts(doc, rules, unusedFonts, filteredUsedFonts, docContent) {
 		let stylesheetContent = "";
 		if (rules) {
 			Array.from(rules).forEach(rule => {
-				const fontFamilyName = rule.style && rule.style.getPropertyValue("font-family");
 				if (rule.media) {
 					stylesheetContent += "@media " + Array.prototype.join.call(rule.media, ",") + "{";
-					stylesheetContent += filterUnusedFonts(doc, rule.cssRules, unusedFonts, docContent);
+					stylesheetContent += filterUnusedFonts(doc, rule.cssRules, unusedFonts, filteredUsedFonts, docContent);
 					stylesheetContent += "}";
 				} else if (rule.type == CSSRule.FONT_FACE_RULE) {
-					if (rule.style && fontFamilyName && !unusedFonts.includes(removeQuotes(fontFamilyName))) {
-						if (testUnicodeRange(docContent, rule.style.getPropertyValue("unicode-range"))) {
-							stylesheetContent += rule.cssText;
+					if (rule.style) {
+						const fontFamily = removeQuotes(rule.style.getPropertyValue("font-family"));
+						if (fontFamily && !unusedFonts.includes(fontFamily)) {
+							let optionalTest;
+							const optionalUsedFonts = filteredUsedFonts && filteredUsedFonts.get(fontFamily);
+							if (optionalUsedFonts && optionalUsedFonts.length) {
+								const fontStyle = rule.style.getPropertyValue("font-style") || "normal";
+								optionalTest = optionalUsedFonts.find(fontInfo => fontInfo.fontStyle == fontStyle);
+								if (optionalTest) {
+									const fontVariant = rule.style.getPropertyValue("font-variant") || "normal";
+									optionalTest = optionalUsedFonts.find(fontInfo => fontInfo.fontVariant == fontVariant);
+								}
+							} else {
+								optionalTest = true;
+							}
+							if (testUnicodeRange(docContent, rule.style.getPropertyValue("unicode-range")) && optionalTest) {
+								stylesheetContent += rule.cssText;
+							}
 						}
 					}
 				} else {

+ 105 - 35
lib/single-file/doc-helper.js

@@ -33,6 +33,12 @@ this.docHelper = this.docHelper || (() => {
 	const MASK_TAGNAME = "singlefile-mask";
 	const BACKDROP_THRESHOLD_SIZE = .95;
 	const BACKDROP_THRESHOLD_ZINDEX = 999;
+	const REGEXP_SIMPLE_QUOTES_STRING = /^'(.*?)'$/;
+	const REGEXP_DOUBLE_QUOTES_STRING = /^"(.*?)"$/;
+	const FONT_WEIGHTS = {
+		normal: "400",
+		bold: "700"
+	};
 
 	return {
 		preProcessDoc,
@@ -56,37 +62,61 @@ this.docHelper = this.docHelper || (() => {
 			element.parentElement.replaceChild(disabledNoscriptElement, element);
 		});
 		doc.head.querySelectorAll("*:not(base):not(link):not(meta):not(noscript):not(script):not(style):not(template):not(title)").forEach(element => element.hidden = true);
-		if (options.removeHiddenElements) {
-			const markerRemovedContent = removedContentAttributeName(options.sessionId);
-			let ignoredTags = JSON.parse(JSON.stringify(IGNORED_REMOVED_TAG_NAMES));
-			if (!options.removeScripts) {
-				ignoredTags = ignoredTags.concat("SCRIPT");
-			}
-			if (win) {
-				markHiddenCandidates(win, doc.body, false, markerRemovedContent, new Set(), ignoredTags);
-				markHiddenElements(win, doc.body, markerRemovedContent);
-				markBackdropBackground(doc, win, markerRemovedContent);
-			}
-		}
-		if (win && options.compressHTML) {
-			doc.querySelectorAll("*").forEach(element => {
-				const style = win.getComputedStyle(element);
-				if (style && style.whiteSpace.startsWith("pre")) {
-					element.setAttribute(preservedSpaceAttributeName(options.sessionId), "");
+		let canvasData, imageData, usedFonts;
+		if (win) {
+			canvasData = getCanvasData(doc, win);
+			imageData = getImageData(doc, win, options);
+			if (options.removeHiddenElements || options.removeAlternativeFonts || options.compressHTML) {
+				const styles = getStyles(win, doc.body);
+				if (options.removeHiddenElements) {
+					const markerRemovedContent = removedContentAttributeName(options.sessionId);
+					let ignoredTags = JSON.parse(JSON.stringify(IGNORED_REMOVED_TAG_NAMES));
+					if (!options.removeScripts) {
+						ignoredTags = ignoredTags.concat("SCRIPT");
+					}
+					markHiddenCandidates(win, doc.body, styles, false, markerRemovedContent, new Set(), ignoredTags);
+					markHiddenElements(win, doc.body, styles, markerRemovedContent);
+					markBackdropBackground(doc, win, markerRemovedContent);
 				}
-			});
+				if (options.removeAlternativeFonts) {
+					usedFonts = getUsedFonts(styles);
+				}
+				if (options.compressHTML) {
+					styles.forEach((style, element) => {
+						if (style.whiteSpace.startsWith("pre")) {
+							element.setAttribute(preservedSpaceAttributeName(options.sessionId), "");
+						}
+					});
+				}
+			}
 		}
 		retrieveInputValues(doc, options);
 		return {
-			canvasData: win && getCanvasData(doc, win),
+			canvasData,
 			fontsData: getFontsData(doc),
 			stylesheetContents: getStylesheetContents(doc),
 			responsiveImageData: getResponsiveImageData(doc, options),
-			imageData: win && getImageData(doc, win, options),
-			postersData: getPostersData(doc)
+			imageData,
+			postersData: getPostersData(doc),
+			usedFonts
 		};
 	}
 
+	function getUsedFonts(styles) {
+		const usedFonts = new Set();
+		styles.forEach(style => {
+			const fontFamilyNames = style.fontFamily.split(",");
+			fontFamilyNames.forEach(fontFamilyName => {
+				style.fontFamily = removeQuotes(fontFamilyName);
+				usedFonts.add(getFontKey(style));
+			});
+		});
+		return Array.from(usedFonts).map(key => {
+			const [fontFamily, fontWeight, fontStyle, fontVariant] = JSON.parse(key);
+			return { fontFamily, fontWeight, fontStyle, fontVariant };
+		});
+	}
+
 	function markBackdropBackground(doc, win, markerRemovedContent) {
 		const threshold = BACKDROP_THRESHOLD_SIZE;
 		let elements = getCandidateElements();
@@ -106,9 +136,28 @@ this.docHelper = this.docHelper || (() => {
 		}
 	}
 
-	function markHiddenCandidates(win, element, elementHidden, markerRemovedContent, removedCandidates, ignoredTags) {
+	function getStyles(win, element, styles = new Map()) {
 		const elements = Array.from(element.childNodes).filter(node => node instanceof win.HTMLElement);
-		elements.forEach(element => markHiddenCandidates(win, element, elementHidden || testHiddenElement(win, element), markerRemovedContent, removedCandidates, ignoredTags));
+		elements.forEach(element => {
+			getStyles(win, element, styles);
+			const computedStyle = win.getComputedStyle(element);
+			styles.set(element, {
+				display: computedStyle.display,
+				opacity: computedStyle.opacity,
+				visibility: computedStyle.visibility,
+				fontFamily: computedStyle.fontFamily,
+				fontWeight: getFontWeight(computedStyle.fontWeight),
+				fontStyle: computedStyle.fontStyle || "normal",
+				fontVariant: computedStyle.fontVariant || "normal",
+				whiteSpace: computedStyle.whiteSpace
+			});
+		});
+		return styles;
+	}
+
+	function markHiddenCandidates(win, element, styles, elementHidden, markerRemovedContent, removedCandidates, ignoredTags) {
+		const elements = Array.from(element.childNodes).filter(node => node instanceof win.HTMLElement);
+		elements.forEach(element => markHiddenCandidates(win, element, styles, elementHidden || testHiddenElement(element, styles.get(element)), markerRemovedContent, removedCandidates, ignoredTags));
 		if (elementHidden && !ignoredTags.includes(element.tagName)) {
 			if (elements.length) {
 				if (!elements.find(element => !removedCandidates.has(element))) {
@@ -121,28 +170,26 @@ this.docHelper = this.docHelper || (() => {
 		}
 	}
 
-	function markHiddenElements(win, element, markerRemovedContent) {
+	function markHiddenElements(win, element, styles, markerRemovedContent) {
 		const elements = Array.from(element.childNodes).filter(node => node.nodeType == win.Node.ELEMENT_NODE);
 		elements.forEach(element => markHiddenElements(win, element, markerRemovedContent));
 		if (element.parentElement.getAttribute(markerRemovedContent) != "") {
 			element.removeAttribute(markerRemovedContent);
+		} else {
+			styles.delete(element);
 		}
 	}
 
-	function testHiddenElement(win, element) {
+	function testHiddenElement(element, style) {
 		let hidden = element.hidden;
-		if (!hidden) {
-			const style = win.getComputedStyle(element);
-			if (style) {
-				hidden = style.display == "none";
-				if (!hidden && (style.opacity == "0" || style.visibility == "hidden")) {
-					const boundingRect = element.getBoundingClientRect();
-					hidden = !boundingRect.width && !boundingRect.height;
-				}
+		if (!hidden && style) {
+			hidden = style.display == "none";
+			if (!hidden && (style.opacity == "0" || style.visibility == "hidden")) {
+				const boundingRect = element.getBoundingClientRect();
+				hidden = !boundingRect.width && !boundingRect.height;
 			}
 		}
-		hidden = Boolean(hidden);
-		return hidden;
+		return Boolean(hidden);
 	}
 
 	function postProcessDoc(doc, options) {
@@ -381,4 +428,27 @@ this.docHelper = this.docHelper || (() => {
 		return docTypeString + doc.documentElement.outerHTML;
 	}
 
+	function getFontKey(style) {
+		return JSON.stringify([
+			style.fontFamily,
+			style.fontWeight,
+			style.fontStyle,
+			style.fontVariant
+		]);
+	}
+
+	function removeQuotes(string) {
+		string = string.toLowerCase().trim();
+		if (string.match(REGEXP_SIMPLE_QUOTES_STRING)) {
+			string = string.replace(REGEXP_SIMPLE_QUOTES_STRING, "$1");
+		} else {
+			string = string.replace(REGEXP_DOUBLE_QUOTES_STRING, "$1");
+		}
+		return string.trim();
+	}
+
+	function getFontWeight(weight) {
+		return FONT_WEIGHTS[weight] || weight;
+	}
+
 })();

+ 2 - 0
lib/single-file/frame-tree.js

@@ -101,6 +101,7 @@ this.frameTree = this.frameTree || (() => {
 					frameData.postersData = messageFrameData.postersData;
 					frameData.canvasData = messageFrameData.canvasData;
 					frameData.fontsData = messageFrameData.fontsData;
+					frameData.usedFonts = messageFrameData.usedFonts;
 					frameData.processed = messageFrameData.processed;
 					frameData.timeout = messageFrameData.timeout;
 				}
@@ -266,6 +267,7 @@ this.frameTree = this.frameTree || (() => {
 			postersData: docData.postersData,
 			canvasData: docData.canvasData,
 			fontsData: docData.fontsData,
+			usedFonts: docData.usedFonts,
 			processed: true
 		};
 	}

+ 4 - 4
lib/single-file/single-file-browser.js

@@ -210,12 +210,12 @@ this.SingleFile = this.SingleFile || (() => {
 			return cssMinifier.process(doc, mediaAllInfo);
 		}
 
-		static removeUnusedFonts(doc) {
-			return fontsMinifier.removeUnusedFonts(doc);
+		static removeUnusedFonts(doc, options) {
+			return fontsMinifier.removeUnusedFonts(doc, options);
 		}
 
-		static removeAlternativeFonts(doc, secondPass) {
-			return fontsMinifier.removeAlternativeFonts(doc, secondPass);
+		static removeAlternativeFonts(doc) {
+			return fontsMinifier.removeAlternativeFonts(doc);
 		}
 
 		static getMediaAllInfo(doc) {

+ 3 - 1
lib/single-file/single-file-core.js

@@ -146,6 +146,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 				this.options.responsiveImageData = docData.responsiveImageData;
 				this.options.imageData = docData.imageData;
 				this.options.postersData = docData.postersData;
+				this.options.usedFonts = docData.usedFonts;
 			}
 			this.options.content = this.options.content || (this.options.doc ? DOM.serialize(this.options.doc, false) : null);
 			this.onprogress = options.onprogress || (() => { });
@@ -539,7 +540,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 		}
 
 		removeUnusedFonts() {
-			DOM.removeUnusedFonts(this.doc);
+			DOM.removeUnusedFonts(this.doc, this.options);
 		}
 
 		removeAlternativeFonts() {
@@ -734,6 +735,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 								options.fontsData = frameData.fontsData;
 								options.imageData = frameData.imageData;
 								options.responsiveImageData = frameData.responsiveImageData;
+								options.usedFonts = frameData.usedFonts;
 								frameData.processor = new PageProcessor(options);
 								frameData.frameElement = frameElement;
 								await frameData.processor.loadPage();