|
@@ -27,12 +27,14 @@ const SingleFileCore = (() => {
|
|
|
function SingleFileCore(...args) {
|
|
function SingleFileCore(...args) {
|
|
|
[Download, DOM, URL] = args;
|
|
[Download, DOM, URL] = args;
|
|
|
return class {
|
|
return class {
|
|
|
- static async process(options) {
|
|
|
|
|
|
|
+ static async initialize(options) {
|
|
|
const processor = new PageProcessor(options);
|
|
const processor = new PageProcessor(options);
|
|
|
processor.onprogress = options.onprogress;
|
|
processor.onprogress = options.onprogress;
|
|
|
await processor.loadPage(options.content);
|
|
await processor.loadPage(options.content);
|
|
|
- await processor.initialize();
|
|
|
|
|
- return await processor.getContent();
|
|
|
|
|
|
|
+ return async () => {
|
|
|
|
|
+ await processor.initialize();
|
|
|
|
|
+ return await processor.getPageData();
|
|
|
|
|
+ };
|
|
|
}
|
|
}
|
|
|
};
|
|
};
|
|
|
}
|
|
}
|
|
@@ -80,11 +82,16 @@ const SingleFileCore = (() => {
|
|
|
if (!this.options.jsEnabled) {
|
|
if (!this.options.jsEnabled) {
|
|
|
this.processor.insertNoscriptContents();
|
|
this.processor.insertNoscriptContents();
|
|
|
}
|
|
}
|
|
|
|
|
+ if (this.options.removeFrames) {
|
|
|
|
|
+ this.processor.removeFrames();
|
|
|
|
|
+ }
|
|
|
this.processor.removeDiscardedResources();
|
|
this.processor.removeDiscardedResources();
|
|
|
this.processor.resetCharsetMeta();
|
|
this.processor.resetCharsetMeta();
|
|
|
this.processor.insertFaviconLink();
|
|
this.processor.insertFaviconLink();
|
|
|
this.processor.resolveHrefs();
|
|
this.processor.resolveHrefs();
|
|
|
- this.processor.insertSingleFileCommentNode();
|
|
|
|
|
|
|
+ if (this.options.insertSingleFileComment) {
|
|
|
|
|
+ this.processor.insertSingleFileCommentNode();
|
|
|
|
|
+ }
|
|
|
this.processor.replaceCanvasElements();
|
|
this.processor.replaceCanvasElements();
|
|
|
if (this.options.removeHiddenElements) {
|
|
if (this.options.removeHiddenElements) {
|
|
|
this.processor.removeHiddenElements();
|
|
this.processor.removeHiddenElements();
|
|
@@ -92,14 +99,18 @@ const SingleFileCore = (() => {
|
|
|
if (this.options.removeUnusedCSSRules) {
|
|
if (this.options.removeUnusedCSSRules) {
|
|
|
this.processor.removeUnusedCSSRules();
|
|
this.processor.removeUnusedCSSRules();
|
|
|
}
|
|
}
|
|
|
- await Promise.all([this.processor.inlineStylesheets(true), this.processor.linkStylesheets()], this.processor.attributeStyles(true));
|
|
|
|
|
- this.pendingPromises = Promise.all([this.processor.inlineStylesheets(), this.processor.attributeStyles(), this.processor.pageResources()]);
|
|
|
|
|
|
|
+ const initializationPromises = [this.processor.inlineStylesheets(true), this.processor.linkStylesheets(), this.processor.attributeStyles(true)];
|
|
|
|
|
+ if (!this.options.removeFrames) {
|
|
|
|
|
+ initializationPromises.push(this.processor.frames(true));
|
|
|
|
|
+ }
|
|
|
|
|
+ await Promise.all(initializationPromises);
|
|
|
|
|
+ this.pendingPromises = [this.processor.inlineStylesheets(), this.processor.attributeStyles(), this.processor.pageResources()];
|
|
|
if (this.onprogress) {
|
|
if (this.onprogress) {
|
|
|
this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, index: 0, max: batchRequest.getMaxResources() }));
|
|
this.onprogress(new ProgressEvent(RESOURCES_INITIALIZED, { pageURL: this.options.url, index: 0, max: batchRequest.getMaxResources() }));
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- async getContent() {
|
|
|
|
|
|
|
+ async getPageData() {
|
|
|
await this.processor.retrieveResources(
|
|
await this.processor.retrieveResources(
|
|
|
details => {
|
|
details => {
|
|
|
if (this.onprogress) {
|
|
if (this.onprogress) {
|
|
@@ -117,10 +128,13 @@ const SingleFileCore = (() => {
|
|
|
if (this.options.removeUnusedCSSRules) {
|
|
if (this.options.removeUnusedCSSRules) {
|
|
|
this.processor.removeUnusedCSSRules();
|
|
this.processor.removeUnusedCSSRules();
|
|
|
}
|
|
}
|
|
|
|
|
+ if (!this.options.removeFrames) {
|
|
|
|
|
+ await this.processor.frames();
|
|
|
|
|
+ }
|
|
|
if (this.onprogress) {
|
|
if (this.onprogress) {
|
|
|
this.onprogress(new ProgressEvent(PAGE_ENDED, { pageURL: this.options.url }));
|
|
this.onprogress(new ProgressEvent(PAGE_ENDED, { pageURL: this.options.url }));
|
|
|
}
|
|
}
|
|
|
- return this.processor.getContent();
|
|
|
|
|
|
|
+ return this.processor.getPageData();
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -209,7 +223,7 @@ const SingleFileCore = (() => {
|
|
|
await batchRequest.run(beforeListener, afterListener);
|
|
await batchRequest.run(beforeListener, afterListener);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- getContent() {
|
|
|
|
|
|
|
+ getPageData() {
|
|
|
if (this.options.selected) {
|
|
if (this.options.selected) {
|
|
|
const selectedElement = this.doc.querySelector("[" + SELECTED_CONTENT_ATTRIBUTE_NAME + "]");
|
|
const selectedElement = this.doc.querySelector("[" + SELECTED_CONTENT_ATTRIBUTE_NAME + "]");
|
|
|
DomProcessorHelper.isolateElement(selectedElement.parentElement, selectedElement);
|
|
DomProcessorHelper.isolateElement(selectedElement.parentElement, selectedElement);
|
|
@@ -221,7 +235,7 @@ const SingleFileCore = (() => {
|
|
|
title = titleElement.textContent.trim();
|
|
title = titleElement.textContent.trim();
|
|
|
}
|
|
}
|
|
|
return {
|
|
return {
|
|
|
- title: title || this.baseURI.match(/([^/]*)\/?$/),
|
|
|
|
|
|
|
+ title: title || (this.baseURI ? this.baseURI.match(/([^/]*)\/?$/) : ""),
|
|
|
content: this.dom.serialize()
|
|
content: this.dom.serialize()
|
|
|
};
|
|
};
|
|
|
}
|
|
}
|
|
@@ -243,11 +257,15 @@ const SingleFileCore = (() => {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
removeDiscardedResources() {
|
|
removeDiscardedResources() {
|
|
|
- this.doc.querySelectorAll("script, iframe, frame, applet, meta[http-equiv=refresh], object:not([type=\"image/svg+xml\"]):not([type=\"image/svg-xml\"]), embed:not([src*=\".svg\"]), link[rel*=preload], link[rel*=prefetch]").forEach(element => element.remove());
|
|
|
|
|
|
|
+ this.doc.querySelectorAll("script, applet, meta[http-equiv=refresh], object:not([type=\"image/svg+xml\"]):not([type=\"image/svg-xml\"]), embed:not([src*=\".svg\"]), link[rel*=preload], link[rel*=prefetch]").forEach(element => element.remove());
|
|
|
this.doc.querySelectorAll("[onload]").forEach(element => element.removeAttribute("onload"));
|
|
this.doc.querySelectorAll("[onload]").forEach(element => element.removeAttribute("onload"));
|
|
|
this.doc.querySelectorAll("audio[src], video[src]").forEach(element => element.removeAttribute("src"));
|
|
this.doc.querySelectorAll("audio[src], video[src]").forEach(element => element.removeAttribute("src"));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ removeFrames() {
|
|
|
|
|
+ this.doc.querySelectorAll("iframe, frame").forEach(element => element.remove());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
resetCharsetMeta() {
|
|
resetCharsetMeta() {
|
|
|
this.doc.querySelectorAll("meta[charset]").forEach(element => element.remove());
|
|
this.doc.querySelectorAll("meta[charset]").forEach(element => element.remove());
|
|
|
const metaElement = this.doc.createElement("meta");
|
|
const metaElement = this.doc.createElement("meta");
|
|
@@ -364,6 +382,37 @@ const SingleFileCore = (() => {
|
|
|
}));
|
|
}));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ async frames(initialization) {
|
|
|
|
|
+ let frameElements = this.doc.querySelectorAll("iframe, frame");
|
|
|
|
|
+ frameElements = DomUtil.removeNoScriptFrames(frameElements);
|
|
|
|
|
+ await Promise.all(frameElements.map(async (frameElement, frameIndex) => {
|
|
|
|
|
+ const frameWindowId = (this.options.windowId || "0") + "." + frameIndex;
|
|
|
|
|
+ const frameData = this.options.framesData.find(frame => frame.windowId == frameWindowId);
|
|
|
|
|
+ if (frameData) {
|
|
|
|
|
+ if (initialization) {
|
|
|
|
|
+ const options = {
|
|
|
|
|
+ removeHiddenElements: this.options.removeHiddenElements,
|
|
|
|
|
+ removeUnusedCSSRules: this.options.removeUnusedCSSRules,
|
|
|
|
|
+ url: frameData.baseURI,
|
|
|
|
|
+ windowId: frameWindowId,
|
|
|
|
|
+ jsEnabled: this.options.jsEnabled,
|
|
|
|
|
+ insertSingleFileComment: false,
|
|
|
|
|
+ framesData: this.options.framesData
|
|
|
|
|
+ };
|
|
|
|
|
+ frameData.processor = new PageProcessor(options);
|
|
|
|
|
+ frameData.frameElement = frameElement;
|
|
|
|
|
+ await frameData.processor.loadPage(frameData.content);
|
|
|
|
|
+ return frameData.processor.initialize();
|
|
|
|
|
+ } else {
|
|
|
|
|
+ const pageData = await frameData.processor.getPageData();
|
|
|
|
|
+ frameElement.setAttribute("src", "data:text/html," + pageData.content);
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ frameElement.setAttribute("src", "about:blank");
|
|
|
|
|
+ }
|
|
|
|
|
+ }));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
async attributeStyles(initialization) {
|
|
async attributeStyles(initialization) {
|
|
|
await Promise.all(Array.from(this.doc.querySelectorAll("[style]")).map(async element => {
|
|
await Promise.all(Array.from(this.doc.querySelectorAll("[style]")).map(async element => {
|
|
|
const stylesheetContent = initialization ? await DomProcessorHelper.resolveImportURLs(element.getAttribute("style"), this.baseURI) : await DomProcessorHelper.processStylesheet(element.getAttribute("style"), this.baseURI);
|
|
const stylesheetContent = initialization ? await DomProcessorHelper.resolveImportURLs(element.getAttribute("style"), this.baseURI) : await DomProcessorHelper.processStylesheet(element.getAttribute("style"), this.baseURI);
|
|
@@ -571,6 +620,16 @@ const SingleFileCore = (() => {
|
|
|
return stylesheetContent;
|
|
return stylesheetContent;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ static removeNoScriptFrames(frameElements) {
|
|
|
|
|
+ return Array.from(frameElements).filter(element => {
|
|
|
|
|
+ element = element.parentElement;
|
|
|
|
|
+ while (element && element.tagName != "NOSCRIPT") {
|
|
|
|
|
+ element = element.parentElement;
|
|
|
|
|
+ }
|
|
|
|
|
+ return !element;
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return SingleFileCore;
|
|
return SingleFileCore;
|