Bladeren bron

improved more "remove hidden elements"

Gildas 7 jaren geleden
bovenliggende
commit
b042439b36
1 gewijzigde bestanden met toevoegingen van 29 en 11 verwijderingen
  1. 29 11
      lib/single-file/doc-helper.js

+ 29 - 11
lib/single-file/doc-helper.js

@@ -23,13 +23,14 @@
 this.docHelper = this.docHelper || (() => {
 
 	const REMOVED_CONTENT_ATTRIBUTE_NAME = "data-single-file-removed-content";
+	const REMOVED_CANDIDATE_ATTRIBUTE_NAME = "data-single-file-removed-candidate";
 	const PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME = "data-single-file-preserved-space-element";
 	const WIN_ID_ATTRIBUTE_NAME = "data-frame-tree-win-id";
 	const RESPONSIVE_IMAGE_ATTRIBUTE_NAME = "data-single-file-responsive-image";
 	const IMAGE_ATTRIBUTE_NAME = "data-single-file-image";
 	const INPUT_VALUE_ATTRIBUTE_NAME = "data-single-file-value";
 	const SHEET_ATTRIBUTE_NAME = "data-single-file-sheet";
-	const IGNORED_TAG_NAMES = ["BR", "SCRIPT", "DISABLED-NOSCRIPT", "META", "LINK", "STYLE", "TITLE"];
+	const IGNORED_TAG_NAMES = ["SCRIPT", "DISABLED-NOSCRIPT", "META", "LINK", "STYLE", "TITLE", "NOSCRIPT", "TEMPLATE", "IFRAME", "FRAME", "OBJECT", "SOURCE"];
 
 	return {
 		preProcessDoc,
@@ -54,7 +55,10 @@ this.docHelper = this.docHelper || (() => {
 		});
 		doc.head.querySelectorAll("*:not(base):not(link):not(meta):not(noscript):not(script):not(style):not(template):not(title)").forEach(element => element.hidden = true);
 		if (options.removeHiddenElements) {
-			markHiddenElements(win, doc.body.childNodes, options);
+			const markerRemovedContent = removedContentAttributeName(options.sessionId);
+			const markerRemovedCandidate = removedCandidateAttributeName(options.sessionId);
+			markHiddenElements(win, doc.body, markerRemovedContent, markerRemovedCandidate);
+			doc.querySelectorAll(("[" + markerRemovedCandidate + "]")).forEach(element => element.removeAttribute(markerRemovedCandidate));
 		}
 		if (options.compressHTML) {
 			doc.querySelectorAll("*").forEach(element => {
@@ -75,16 +79,26 @@ this.docHelper = this.docHelper || (() => {
 		};
 	}
 
-	function markHiddenElements(win, nodes, options) {
-		const marker = removedContentAttributeName(options.sessionId);
-		const elements = Array.from(nodes).filter(node => node.nodeType == Node.ELEMENT_NODE);
-		elements.forEach(element => {
-			markHiddenElements(win, element.childNodes, options);
-			const boundingRect = element.getBoundingClientRect();
-			if (!IGNORED_TAG_NAMES.includes(element.nodeName) && !elements.find(element => !(element instanceof win.HTMLElement) || element.getAttribute(marker) === undefined) && !boundingRect.width && !boundingRect.height) {
-				element.setAttribute(marker, "");
+	function markHiddenElements(win, element, markerRemovedContent, markerRemovedCandidate) {
+		const elements = Array.from(element.childNodes).filter(node => node.nodeType == Node.ELEMENT_NODE);
+		elements.forEach(element => markHiddenElements(win, element, markerRemovedContent, markerRemovedCandidate));
+		if (elements.length) {
+			let hiddenElement = !IGNORED_TAG_NAMES.includes(element.nodeName);
+			if (hiddenElement) {
+				hiddenElement = !elements.find(element => (!(element instanceof win.HTMLElement) || element.getAttribute(markerRemovedCandidate) !== ""));
+				hiddenElement = hiddenElement && element.hidden || (element.style && (element.style.display == "none" || element.style.opacity == "0" || element.style.visibility == "hidden"));
 			}
-		});
+			if (!hiddenElement) {
+				const boundingRect = element.getBoundingClientRect();
+				hiddenElement = !boundingRect.width && !boundingRect.height;
+			}
+			if (hiddenElement) {
+				element.setAttribute(markerRemovedCandidate, "");
+				elements.forEach(element => element.setAttribute(markerRemovedContent, ""));
+			}
+		} else {
+			element.setAttribute(markerRemovedCandidate, "");
+		}
 	}
 
 	function postProcessDoc(doc, options) {
@@ -121,6 +135,10 @@ this.docHelper = this.docHelper || (() => {
 		return REMOVED_CONTENT_ATTRIBUTE_NAME + (sessionId || "");
 	}
 
+	function removedCandidateAttributeName(sessionId) {
+		return REMOVED_CANDIDATE_ATTRIBUTE_NAME + (sessionId || "");
+	}
+
 	function windowIdAttributeName(sessionId) {
 		return WIN_ID_ATTRIBUTE_NAME + (sessionId || "");
 	}