|
|
@@ -36,10 +36,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
constructor(options) {
|
|
|
this.options = options;
|
|
|
this.SELECTED_CONTENT_ATTRIBUTE_NAME = SELECTED_CONTENT_ATTRIBUTE_NAME;
|
|
|
- this.REMOVED_CONTENT_ATTRIBUTE_NAME = REMOVED_CONTENT_ATTRIBUTE_NAME;
|
|
|
this.SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME = SELECTED_CONTENT_ROOT_ATTRIBUTE_NAME;
|
|
|
- this.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME = PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME;
|
|
|
- this.WIN_ID_ATTRIBUTE_NAME = WIN_ID_ATTRIBUTE_NAME;
|
|
|
}
|
|
|
async initialize() {
|
|
|
this.processor = new PageProcessor(this.options);
|
|
|
@@ -77,7 +74,20 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
class PageProcessor {
|
|
|
constructor(options) {
|
|
|
this.options = options;
|
|
|
+ this.options.content = this.options.content || (this.options.doc ? DOMProcessor.serialize(this.options.doc, false) : null);
|
|
|
+ this.options.url = this.options.url || this.options.doc.location.href;
|
|
|
this.processor = new DOMProcessor(options);
|
|
|
+ if (this.options.doc) {
|
|
|
+ this.processor.fixInlineScripts();
|
|
|
+ this.processor.disableNoscriptElements();
|
|
|
+ this.processor.hideNonMetadataContents();
|
|
|
+ if (this.options.removeHiddenElements) {
|
|
|
+ this.processor.markRemovedElements();
|
|
|
+ }
|
|
|
+ if (this.options.compressHTML) {
|
|
|
+ this.processor.markPreservedElements();
|
|
|
+ }
|
|
|
+ }
|
|
|
this.onprogress = options.onprogress || (() => { });
|
|
|
}
|
|
|
|
|
|
@@ -90,7 +100,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
async initialize() {
|
|
|
this.onprogress(new ProgressEvent(RESOURCES_INITIALIZING, { pageURL: this.options.url }));
|
|
|
this.processor.removeInfoToolbar();
|
|
|
- this.processor.enableDisabledNoscriptTags();
|
|
|
+ this.processor.enableDisabledNoscriptTags(this.processor.doc.head.querySelectorAll("disabled-noscript"));
|
|
|
this.processor.replaceEmptyStyles();
|
|
|
if (!this.options.jsEnabled || (this.options.saveRawPage && this.options.removeScripts)) {
|
|
|
this.processor.insertNoscriptContents();
|
|
|
@@ -135,6 +145,21 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
if (!this.options.removeScripts) {
|
|
|
this.pendingPromises.push(this.processor.scripts());
|
|
|
}
|
|
|
+ if (this.options.doc) {
|
|
|
+ this.processor.enableDisabledNoscriptTags(this.options.doc.querySelectorAll("disabled-noscript"));
|
|
|
+ this.processor.displayHiddenNonMetadataContents();
|
|
|
+ if (this.options.removeHiddenElements) {
|
|
|
+ this.processor.unmarkRemovedElements();
|
|
|
+ }
|
|
|
+ if (this.options.compressHTML) {
|
|
|
+ this.processor.unmarkPreservedElements();
|
|
|
+ }
|
|
|
+ if (!this.options.removeFrames) {
|
|
|
+ this.processor.removeWindowIdFrames();
|
|
|
+ }
|
|
|
+ this.options.doc = null;
|
|
|
+ this.options.win = null;
|
|
|
+ }
|
|
|
this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, index: 0, max: batchRequest.getMaxResources() }));
|
|
|
}
|
|
|
|
|
|
@@ -256,6 +281,10 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
this.baseURI = options.url;
|
|
|
}
|
|
|
|
|
|
+ static serialize(doc, compressHTML) {
|
|
|
+ return DOM.serialize(doc, compressHTML);
|
|
|
+ }
|
|
|
+
|
|
|
async loadPage(pageContent) {
|
|
|
if (!pageContent || this.options.saveRawPage) {
|
|
|
pageContent = await Download.getContent(this.baseURI, { asDataURI: false, maxResourceSize: this.options.maxResourceSize, maxResourceSizeEnabled: this.options.maxResourceSizeEnabled });
|
|
|
@@ -304,7 +333,7 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
if (this.options.displayStats) {
|
|
|
size = new Blob([this.doc.documentElement.outerHTML]).size;
|
|
|
}
|
|
|
- const content = this.dom.serialize(this.options);
|
|
|
+ const content = DOMProcessor.serialize(this.doc, this.options.compressHTML);
|
|
|
if (this.options.displayStats) {
|
|
|
this.stats.processed.htmlBytes = new Blob([content]).size;
|
|
|
this.stats.discarded.htmlBytes += size - this.stats.processed.htmlBytes;
|
|
|
@@ -316,6 +345,65 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
};
|
|
|
}
|
|
|
|
|
|
+ fixInlineScripts() {
|
|
|
+ this.options.doc.querySelectorAll("script").forEach(element => element.textContent = element.textContent.replace(/<\/script>/gi, "<\\/script>"));
|
|
|
+ }
|
|
|
+
|
|
|
+ disableNoscriptElements() {
|
|
|
+ this.options.doc.head.querySelectorAll("noscript").forEach(element => {
|
|
|
+ const disabledNoscriptElement = this.options.doc.createElement("disabled-noscript");
|
|
|
+ Array.from(element.childNodes).forEach(node => disabledNoscriptElement.appendChild(node));
|
|
|
+ disabledNoscriptElement.hidden = true;
|
|
|
+ element.parentElement.replaceChild(disabledNoscriptElement, element);
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ hideNonMetadataContents() {
|
|
|
+ this.options.doc.head.querySelectorAll("*:not(base):not(link):not(meta):not(noscript):not(script):not(style):not(template):not(title)").forEach(element => element.hidden = true);
|
|
|
+ }
|
|
|
+
|
|
|
+ markRemovedElements() {
|
|
|
+ this.options.doc.querySelectorAll("html > body *:not(style):not(script):not(link):not(frame):not(iframe):not(object)").forEach(element => {
|
|
|
+ const style = this.options.win.getComputedStyle(element);
|
|
|
+ if (element instanceof this.options.win.HTMLElement && (element.hidden || style.display == "none" || ((style.opacity === 0 || style.visibility == "hidden") && !element.clientWidth && !element.clientHeight)) && !element.querySelector("iframe, frame, object[type=\"text/html\"][data]")) {
|
|
|
+ element.setAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME, "");
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ markPreservedElements() {
|
|
|
+ this.options.doc.querySelectorAll("*").forEach(element => {
|
|
|
+ const style = this.options.win.getComputedStyle(element);
|
|
|
+ if (style.whiteSpace.startsWith("pre")) {
|
|
|
+ element.setAttribute(PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME, "");
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
+ displayHiddenNonMetadataContents() {
|
|
|
+ this.options.doc.head.querySelectorAll("*:not(base):not(link):not(meta):not(noscript):not(script):not(style):not(template):not(title)").forEach(element => element.removeAttribute("hidden"));
|
|
|
+ }
|
|
|
+
|
|
|
+ unmarkPreservedElements() {
|
|
|
+ this.options.doc.querySelectorAll("[" + PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME));
|
|
|
+ }
|
|
|
+
|
|
|
+ unmarkRemovedElements() {
|
|
|
+ this.options.doc.querySelectorAll("[" + REMOVED_CONTENT_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(REMOVED_CONTENT_ATTRIBUTE_NAME));
|
|
|
+ }
|
|
|
+
|
|
|
+ removeWindowIdFrames() {
|
|
|
+ this.options.doc.querySelectorAll("[" + WIN_ID_ATTRIBUTE_NAME + "]").forEach(element => element.removeAttribute(WIN_ID_ATTRIBUTE_NAME));
|
|
|
+ }
|
|
|
+
|
|
|
+ enableDisabledNoscriptTags(noscriptTags) {
|
|
|
+ noscriptTags.forEach(element => {
|
|
|
+ const noscriptElement = this.options.doc.createElement("noscript");
|
|
|
+ Array.from(element.childNodes).forEach(node => noscriptElement.appendChild(node));
|
|
|
+ element.parentElement.replaceChild(noscriptElement, element);
|
|
|
+ });
|
|
|
+ }
|
|
|
+
|
|
|
insertNoscriptContents() {
|
|
|
if (this.DOMParser) {
|
|
|
this.doc.querySelectorAll("noscript").forEach(element => {
|
|
|
@@ -463,14 +551,6 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
this.doc.documentElement.insertBefore(commentNode, this.doc.documentElement.firstChild);
|
|
|
}
|
|
|
|
|
|
- enableDisabledNoscriptTags() {
|
|
|
- this.doc.querySelectorAll("disabled-noscript").forEach(element => {
|
|
|
- const noscriptElement = this.doc.createElement("noscript");
|
|
|
- Array.from(element.childNodes).forEach(node => noscriptElement.appendChild(node));
|
|
|
- element.parentElement.replaceChild(noscriptElement, element);
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
replaceCanvasElements() {
|
|
|
if (this.options.canvasData) {
|
|
|
this.doc.querySelectorAll("canvas").forEach((canvasElement, indexCanvasElement) => {
|