|
|
@@ -338,12 +338,25 @@ this.SingleFileCore = this.SingleFileCore || (() => {
|
|
|
this.stats.set("processed", "resources", this.maxResources);
|
|
|
}
|
|
|
|
|
|
- async loadPage(pageContent) {
|
|
|
+ async loadPage(pageContent, charset) {
|
|
|
+ let content;
|
|
|
if (!pageContent || this.options.saveRawPage) {
|
|
|
- const content = await DocUtil.getContent(this.baseURI, { asDataURI: false, maxResourceSize: this.options.maxResourceSize, maxResourceSizeEnabled: this.options.maxResourceSizeEnabled });
|
|
|
+ content = await DocUtil.getContent(this.baseURI, { asDataURI: false, maxResourceSize: this.options.maxResourceSize, maxResourceSizeEnabled: this.options.maxResourceSizeEnabled, charset });
|
|
|
pageContent = content.data;
|
|
|
}
|
|
|
this.doc = DocUtil.createDoc(pageContent, this.baseURI);
|
|
|
+ if (this.options.saveRawPage) {
|
|
|
+ let charset;
|
|
|
+ this.doc.querySelectorAll("meta[charset], meta[http-equiv=\"content-type\"]").forEach(element => {
|
|
|
+ const charsetDeclaration = element.content.split(";")[1];
|
|
|
+ if (charsetDeclaration && !charset) {
|
|
|
+ charset = charsetDeclaration.split("=")[1].trim().toLowerCase();
|
|
|
+ }
|
|
|
+ });
|
|
|
+ if (charset && charset != content.charset) {
|
|
|
+ return this.loadPage(pageContent, charset);
|
|
|
+ }
|
|
|
+ }
|
|
|
this.onEventAttributeNames = Util.getOnEventAttributeNames(this.doc);
|
|
|
}
|
|
|
|