Ver Fonte

improved performances of doc pre-processing

Gildas há 6 anos atrás
pai
commit
59ca7117d0

+ 5 - 3
lib/single-file/single-file-core.js

@@ -554,13 +554,14 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 
 		insertVideoPosters() {
 			if (this.options.postersData) {
-				this.doc.querySelectorAll("video[src], video > source[src]").forEach((element, videoIndex) => {
+				this.doc.querySelectorAll("video[src], video > source[src]").forEach(element => {
 					let videoElement;
 					if (element.tagName == "VIDEO") {
 						videoElement = element;
 					} else {
 						videoElement = element.parentElement;
 					}
+					const videoIndex = Number(element.getAttribute(docUtil.POSTER_ATTRIBUTE_NAME));
 					if (!videoElement.poster && this.options.postersData[videoIndex]) {
 						videoElement.setAttribute("poster", this.options.postersData[videoIndex]);
 					}
@@ -711,8 +712,9 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 
 		replaceCanvasElements() {
 			if (this.options.canvasData) {
-				this.doc.querySelectorAll("canvas").forEach((canvasElement, indexCanvasElement) => {
-					const canvasData = this.options.canvasData[indexCanvasElement];
+				this.doc.querySelectorAll("canvas").forEach(canvasElement => {
+					const canvasIndex = Number(canvasElement.getAttribute(docUtil.CANVAS_ATTRIBUTE_NAME));
+					const canvasData = this.options.canvasData[canvasIndex];
 					if (canvasData) {
 						ProcessorHelper.setBackgroundImage(canvasElement, "url(" + canvasData.dataURI + ")");
 						this.stats.add("processed", "canvas", 1);

+ 125 - 184
lib/single-file/util/doc-helper.js

@@ -30,6 +30,8 @@ this.docHelper = this.docHelper || (() => {
 	const SHADOW_ROOT_ATTRIBUTE_NAME = "data-single-file-shadow-root-element";
 	const WIN_ID_ATTRIBUTE_NAME = "data-frame-tree-win-id";
 	const IMAGE_ATTRIBUTE_NAME = "data-single-file-image";
+	const POSTER_ATTRIBUTE_NAME = "data-single-file-poster";
+	const CANVAS_ATTRIBUTE_NAME = "data-single-file-canvas";
 	const INPUT_VALUE_ATTRIBUTE_NAME = "data-single-file-value";
 	const LAZY_SRC_ATTRIBUTE_NAME = "data-lazy-loaded-src";
 	const IGNORED_REMOVED_TAG_NAMES = ["NOSCRIPT", "DISABLED-NOSCRIPT", "META", "LINK", "STYLE", "TITLE", "TEMPLATE", "SOURCE", "OBJECT"];
@@ -50,6 +52,8 @@ this.docHelper = this.docHelper || (() => {
 		PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME,
 		REMOVED_CONTENT_ATTRIBUTE_NAME,
 		IMAGE_ATTRIBUTE_NAME,
+		POSTER_ATTRIBUTE_NAME,
+		CANVAS_ATTRIBUTE_NAME,
 		INPUT_VALUE_ATTRIBUTE_NAME,
 		SHADOW_ROOT_ATTRIBUTE_NAME
 	};
@@ -73,147 +77,148 @@ this.docHelper = this.docHelper || (() => {
 		if (doc.head) {
 			doc.head.querySelectorAll("*:not(base):not(link):not(meta):not(noscript):not(script):not(style):not(template):not(title)").forEach(element => element.hidden = true);
 		}
-		let canvasData, imageData, usedFonts;
-		if (win) {
-			canvasData = getCanvasData(doc, win);
-			imageData = getImageData(doc, win, options);
-		}
-		if (win) {
-			if (doc.body && (options.removeHiddenElements || options.removeUnusedFonts || options.compressHTML)) {
-				let elementsInfo = getElementsInfo(win, doc.body);
-				if (options.removeHiddenElements) {
-					let ignoredTags = JSON.parse(JSON.stringify(IGNORED_REMOVED_TAG_NAMES));
-					if (!options.removeScripts) {
-						ignoredTags = ignoredTags.concat("SCRIPT");
-					}
-					markHiddenCandidates(win, doc.body, elementsInfo, false, new Set(), ignoredTags);
-					markHiddenElements(doc.body, imageData);
-					doc.querySelectorAll("iframe").forEach(element => {
-						if (element.getBoundingClientRect) {
-							const boundingRect = element.getBoundingClientRect();
-							if (element.hidden || element.style.display == "none" || boundingRect.width <= 1 && boundingRect.height <= 1) {
-								element.setAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME, "");
-							}
-						}
-					});
-					elementsInfo = new Map(Array.from(elementsInfo).filter(([element]) => !element.attributes || element.getAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME) != ""));
-				}
-				if (options.removeUnusedFonts) {
-					let loadedFonts;
-					if (doc.fonts) {
-						loadedFonts = Array.from(doc.fonts).filter(font => font.status == "loaded" || font.status == "loading");
-					}
-					usedFonts = getUsedFonts(elementsInfo, loadedFonts);
-				}
-				if (options.compressHTML) {
-					elementsInfo.forEach((elementInfo, element) => {
-						if (element.attributes && elementInfo.whiteSpace.startsWith("pre")) {
-							element.setAttribute(PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME, "");
-						}
-					});
+		let elementsInfo;
+		if (win && doc.body) {
+			if (options.removeHiddenElements) {
+				options.ignoredTags = Array.from(IGNORED_REMOVED_TAG_NAMES);
+				if (!options.removeScripts) {
+					options.ignoredTags = options.ignoredTags.concat("SCRIPT");
 				}
 			}
+			elementsInfo = getElementsInfo(win, doc, doc.body, options);
 		}
-		retrieveInputValues(doc);
+		saveInputValues(doc);
 		return {
-			canvasData,
+			canvasData: elementsInfo.canvasData,
 			fontsData: getFontsData(doc),
 			stylesheetsData: getStylesheetsData(doc),
-			imageData,
-			postersData: getPostersData(doc),
-			usedFonts,
-			shadowRootsData: getShadowRootsData(doc.body),
+			imageData: elementsInfo.imagesData,
+			postersData: elementsInfo.postersData,
+			usedFonts: elementsInfo.usedFonts,
+			shadowRootsData: elementsInfo.shadowRootsData,
 			referrer: doc.referrer
 		};
 	}
 
-	function getShadowRootsData(element, data = { indexElement: 0, shadowRootsData: [] }) {
-		element.querySelectorAll("*").forEach(element => {
+	function getElementsInfo(win, doc, element, options, data = { usedFonts: new Set(), canvasData: [], imagesData: [], postersData: [], shadowRootsData: [] }, ascendantHidden) {
+		const elements = Array.from(element.childNodes).filter(node => node instanceof win.HTMLElement);
+		elements.forEach(element => {
+			let elementHidden;
+			if (options.removeHiddenElements || options.removeUnusedFonts || options.compressHTML) {
+				const computedStyle = win.getComputedStyle(element);
+				if (options.removeHiddenElements) {
+					const display = computedStyle.getPropertyValue("display");
+					const opacity = computedStyle.getPropertyValue("opacity");
+					const visibility = computedStyle.getPropertyValue("visibility");
+					if (ascendantHidden && !options.ignoredTags.includes(element.tagName)) {
+						if (elements.length) {
+							const elements = Array.from(element.childNodes).filter(node => node instanceof win.HTMLElement);
+							elements.forEach(element => element.setAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME, ""));
+						}
+					}
+					elementHidden = ascendantHidden || testHiddenElement(element, { display, opacity, visibility });
+				}
+				if (options.compressHTML) {
+					const whiteSpace = computedStyle.getPropertyValue("white-space");
+					if (whiteSpace.startsWith("pre")) {
+						element.setAttribute(PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME, "");
+					}
+				}
+				if (options.removeUnusedFonts) {
+					getUsedFont(computedStyle, options, data.usedFonts);
+					getUsedFont(win.getComputedStyle(element, ":first-letter"), options, data.usedFonts);
+					getUsedFont(win.getComputedStyle(element, ":before"), options, data.usedFonts);
+					getUsedFont(win.getComputedStyle(element, ":after"), options, data.usedFonts);
+				}
+			}
+			getResourcesInfo(win, doc, element, options, data, elementHidden);
 			if (element.shadowRoot) {
-				const indexEntry = data.indexElement;
-				element.setAttribute(SHADOW_ROOT_ATTRIBUTE_NAME, indexEntry);
-				data.indexElement++;
-				getShadowRootsData(element.shadowRoot, data);
-				data.shadowRootsData[indexEntry] = { content: element.shadowRoot.innerHTML };
+				const shadowRootInfo = {};
+				element.setAttribute(SHADOW_ROOT_ATTRIBUTE_NAME, data.shadowRootsData.length);
+				data.shadowRootsData.push(shadowRootInfo);
+				getElementsInfo(win, doc, element.shadowRoot, options, data, elementHidden);
+				shadowRootInfo.content = element.shadowRoot.innerHTML;
 			}
+			getElementsInfo(win, doc, element, options, data, elementHidden);
 		});
-		return data.shadowRootsData;
+		data.usedFonts = Array.from(data.usedFonts);
+		return data;
 	}
 
-	function getUsedFonts(styles, loadedFonts) {
-		const usedFonts = new Set();
-		styles.forEach(style => {
-			const fontFamilyNames = style.fontFamily.split(",");
-			fontFamilyNames.forEach(fontFamilyName => {
-				fontFamilyName = normalizeFontFamily(fontFamilyName);
-				if (!loadedFonts || loadedFonts.find(font => normalizeFontFamily(font.family) == fontFamilyName && font.style == style.fontStyle)) {
-					const { fontWeight, fontStyle, fontVariant } = style;
-					usedFonts.add([fontFamilyName, fontWeight, fontStyle, fontVariant]);
+	function getResourcesInfo(win, doc, element, options, data, elementHidden) {
+		if (element.tagName == "CANVAS") {
+			try {
+				const size = getSize(win, element);
+				data.canvasData.push({ dataURI: element.toDataURL("image/png", ""), width: size.width, height: size.height });
+				element.setAttribute(CANVAS_ATTRIBUTE_NAME, data.canvasData.length - 1);
+			} catch (error) {
+				// ignored
+			}
+		}
+		if (element.tagName == "IMG") {
+			element.setAttribute(IMAGE_ATTRIBUTE_NAME, data.imagesData.length);
+			const imageData = {
+				currentSrc: elementHidden ?
+					"" :
+					(options.loadDeferredImages && element.getAttribute(LAZY_SRC_ATTRIBUTE_NAME)) || element.currentSrc
+			};
+			element.removeAttribute(LAZY_SRC_ATTRIBUTE_NAME);
+			const computedStyle = win.getComputedStyle(element);
+			if (computedStyle) {
+				imageData.size = getSize(win, element);
+				if ((!computedStyle.getPropertyValue("box-shadow") || computedStyle.getPropertyValue("box-shadow") == "none") &&
+					(!computedStyle.getPropertyValue("background-image") || computedStyle.getPropertyValue("background-image") == "none") &&
+					(imageData.size.pxWidth > 1 || imageData.size.pxHeight > 1)) {
+					imageData.replaceable = true;
+					imageData.backgroundColor = computedStyle.getPropertyValue("background-color");
+					imageData.objectFit = computedStyle.getPropertyValue("object-fit");
+					imageData.boxSizing = computedStyle.getPropertyValue("box-sizing");
+					imageData.objectPosition = computedStyle.getPropertyValue("object-position");
 				}
-			});
-		});
-		return Array.from(usedFonts);
-	}
-
-	function normalizeFontFamily(fontFamilyName) {
-		return removeQuotes(fontFamilyName.trim()).toLowerCase();
-	}
-
-	function getElementsInfo(win, element) {
-		const elementsInfo = new Map();
-		element.querySelectorAll("*").forEach(element => {
-			setInfo(win, element, elementsInfo);
-			setInfo(win, element, elementsInfo, ":first-letter");
-			setInfo(win, element, elementsInfo, ":before");
-			setInfo(win, element, elementsInfo, ":after");
-		});
-		return elementsInfo;
-	}
-
-	function setInfo(win, element, elementsInfo, pseudoElement) {
-		const computedStyle = win.getComputedStyle(element, pseudoElement);
-		const key = pseudoElement ? { element, pseudoElement } : element;
-		elementsInfo.set(key, {
-			display: computedStyle.getPropertyValue("display"),
-			opacity: computedStyle.getPropertyValue("opacity"),
-			visibility: computedStyle.getPropertyValue("visibility"),
-			fontFamily: computedStyle.getPropertyValue("font-family"),
-			fontWeight: getFontWeight(computedStyle.getPropertyValue("font-weight")),
-			fontStyle: computedStyle.getPropertyValue("font-style") || "normal",
-			fontVariant: computedStyle.getPropertyValue("font-variant") || "normal",
-			whiteSpace: computedStyle.getPropertyValue("white-space")
-		});
-	}
-
-	function markHiddenCandidates(win, element, elementsInfo, elementHidden, removedCandidates, ignoredTags) {
-		const elements = Array.from(element.childNodes).filter(node => node instanceof win.HTMLElement);
-		elements.forEach(element => markHiddenCandidates(win, element, elementsInfo, elementHidden || testHiddenElement(element, elementsInfo.get(element)), removedCandidates, ignoredTags));
-		if (elementHidden && !ignoredTags.includes(element.tagName)) {
-			if (elements.length) {
-				if (!elements.find(element => !removedCandidates.has(element))) {
-					removedCandidates.add(element);
-					elements.forEach(element => element.setAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME, ""));
+			}
+			data.imagesData.push(imageData);
+		}
+		if (element.tagName == "VIDEO") {
+			if (!element.poster) {
+				const canvasElement = doc.createElement("canvas");
+				const context = canvasElement.getContext("2d");
+				canvasElement.width = element.clientWidth;
+				canvasElement.height = element.clientHeight;
+				try {
+					context.drawImage(element, 0, 0, canvasElement.width, canvasElement.height);
+					data.postersData.push(canvasElement.toDataURL("image/png", ""));
+					element.setAttribute(POSTER_ATTRIBUTE_NAME, data.postersData.length - 1);
+				} catch (error) {
+					// ignored
 				}
-			} else {
-				removedCandidates.add(element);
 			}
 		}
-	}
-
-	function markHiddenElements(element, imageData) {
-		if (element.getAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME) == "") {
-			element.removeAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME);
-			if (element.tagName == "IMG") {
-				const imgData = imageData[Number(element.getAttribute(IMAGE_ATTRIBUTE_NAME))];
-				if (imgData) {
-					imgData.currentSrc = "";
+		if (element.tagName == "IFRAME") {
+			if (element.getBoundingClientRect) {
+				const boundingRect = element.getBoundingClientRect();
+				if (element.hidden || element.style.display == "none" || boundingRect.width <= 1 && boundingRect.height <= 1) {
+					element.setAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME, "");
 				}
 			}
-		} else {
-			Array.from(element.childNodes).filter(node => node.nodeType == 1).forEach(element => markHiddenElements(element, imageData));
 		}
 	}
 
+	function getUsedFont(computedStyle, options, usedFonts) {
+		const fontStyle = computedStyle.getPropertyValue("font-style") || "normal";
+		computedStyle.getPropertyValue("font-family").split(",").forEach(fontFamilyName => {
+			fontFamilyName = normalizeFontFamily(fontFamilyName);
+			if (!options.loadedFonts || options.loadedFonts.find(font => normalizeFontFamily(font.family) == fontFamilyName && font.style == fontStyle)) {
+				const fontWeight = getFontWeight(computedStyle.getPropertyValue("font-weight"));
+				const fontVariant = computedStyle.getPropertyValue("font-variant") || "normal";
+				usedFonts.add([fontFamilyName, fontWeight, fontStyle, fontVariant]);
+			}
+		});
+	}
+
+	function normalizeFontFamily(fontFamilyName) {
+		return removeQuotes(fontFamilyName.trim()).toLowerCase();
+	}
+
 	function testHiddenElement(element, style) {
 		let hidden = false;
 		if (style) {
@@ -246,25 +251,12 @@ this.docHelper = this.docHelper || (() => {
 			doc.querySelectorAll("[" + PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME));
 		}
 		doc.querySelectorAll("[" + IMAGE_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(IMAGE_ATTRIBUTE_NAME));
+		doc.querySelectorAll("[" + POSTER_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(POSTER_ATTRIBUTE_NAME));
+		doc.querySelectorAll("[" + CANVAS_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(CANVAS_ATTRIBUTE_NAME));
 		doc.querySelectorAll("[" + INPUT_VALUE_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(INPUT_VALUE_ATTRIBUTE_NAME));
 		doc.querySelectorAll("[" + SHADOW_ROOT_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(SHADOW_ROOT_ATTRIBUTE_NAME));
 	}
 
-	function getCanvasData(doc, win) {
-		if (doc) {
-			const canvasData = [];
-			doc.querySelectorAll("canvas").forEach(canvasElement => {
-				try {
-					const size = getSize(win, canvasElement);
-					canvasData.push({ dataURI: canvasElement.toDataURL("image/png", ""), width: size.width, height: size.height });
-				} catch (error) {
-					canvasData.push(null);
-				}
-			});
-			return canvasData;
-		}
-	}
-
 	function getStylesheetsData(doc) {
 		if (doc) {
 			const contents = [];
@@ -287,34 +279,6 @@ this.docHelper = this.docHelper || (() => {
 		}
 	}
 
-	function getImageData(doc, win, options) {
-		if (doc) {
-			const data = [];
-			doc.querySelectorAll("img").forEach((imageElement, imageElementIndex) => {
-				imageElement.setAttribute(IMAGE_ATTRIBUTE_NAME, imageElementIndex);
-				const imageData = {
-					currentSrc: (options.loadDeferredImages && imageElement.getAttribute(LAZY_SRC_ATTRIBUTE_NAME)) || imageElement.currentSrc
-				};
-				imageElement.removeAttribute(LAZY_SRC_ATTRIBUTE_NAME);
-				const computedStyle = win.getComputedStyle(imageElement);
-				if (computedStyle) {
-					imageData.size = getSize(win, imageElement);
-					if ((!computedStyle.getPropertyValue("box-shadow") || computedStyle.getPropertyValue("box-shadow") == "none") &&
-						(!computedStyle.getPropertyValue("background-image") || computedStyle.getPropertyValue("background-image") == "none") &&
-						(imageData.size.pxWidth > 1 || imageData.size.pxHeight > 1)) {
-						imageData.replaceable = true;
-						imageData.backgroundColor = computedStyle.getPropertyValue("background-color");
-						imageData.objectFit = computedStyle.getPropertyValue("object-fit");
-						imageData.boxSizing = computedStyle.getPropertyValue("box-sizing");
-						imageData.objectPosition = computedStyle.getPropertyValue("object-position");
-					}
-				}
-				data.push(imageData);
-			});
-			return data;
-		}
-	}
-
 	function getSize(win, imageElement) {
 		let pxWidth = imageElement.naturalWidth;
 		let pxHeight = imageElement.naturalHeight;
@@ -358,36 +322,13 @@ this.docHelper = this.docHelper || (() => {
 		}
 	}
 
-	function getPostersData(doc) {
-		if (doc) {
-			const postersData = [];
-			doc.querySelectorAll("video").forEach(videoElement => {
-				if (videoElement.poster) {
-					postersData.push(null);
-				} else {
-					const canvasElement = doc.createElement("canvas");
-					const context = canvasElement.getContext("2d");
-					canvasElement.width = videoElement.clientWidth;
-					canvasElement.height = videoElement.clientHeight;
-					try {
-						context.drawImage(videoElement, 0, 0, canvasElement.width, canvasElement.height);
-						postersData.push(canvasElement.toDataURL("image/png", ""));
-					} catch (error) {
-						postersData.push(null);
-					}
-				}
-			});
-			return postersData;
-		}
-	}
-
 	function getFontsData() {
 		if (typeof hooksFrame != "undefined") {
 			return hooksFrame.getFontsData();
 		}
 	}
 
-	function retrieveInputValues(doc) {
+	function saveInputValues(doc) {
 		doc.querySelectorAll("input").forEach(input => input.setAttribute(INPUT_VALUE_ATTRIBUTE_NAME, input.value));
 		doc.querySelectorAll("input[type=radio], input[type=checkbox]").forEach(input => input.setAttribute(INPUT_VALUE_ATTRIBUTE_NAME, input.checked));
 		doc.querySelectorAll("textarea").forEach(textarea => textarea.setAttribute(INPUT_VALUE_ATTRIBUTE_NAME, textarea.value));

+ 2 - 0
lib/single-file/util/doc-util.js

@@ -152,6 +152,8 @@ this.docUtil = this.docUtil || (() => {
 				WIN_ID_ATTRIBUTE_NAME: modules.docHelper.WIN_ID_ATTRIBUTE_NAME,
 				REMOVED_CONTENT_ATTRIBUTE_NAME: modules.docHelper.REMOVED_CONTENT_ATTRIBUTE_NAME,
 				IMAGE_ATTRIBUTE_NAME: modules.docHelper.IMAGE_ATTRIBUTE_NAME,
+				POSTER_ATTRIBUTE_NAME: modules.docHelper.POSTER_ATTRIBUTE_NAME,
+				CANVAS_ATTRIBUTE_NAME: modules.docHelper.CANVAS_ATTRIBUTE_NAME,
 				INPUT_VALUE_ATTRIBUTE_NAME: modules.docHelper.INPUT_VALUE_ATTRIBUTE_NAME,
 				SHADOW_ROOT_ATTRIBUTE_NAME: modules.docHelper.SHADOW_ROOT_ATTRIBUTE_NAME,
 				PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME: modules.docHelper.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME