Gildas 7 yıl önce
ebeveyn
işleme
70ad806a17

+ 5 - 5
lib/single-file/single-file-browser.js

@@ -149,21 +149,21 @@ this.SingleFileBrowser = this.SingleFileBrowser || (() => {
 			try {
 				buffer = await resourceContent.arrayBuffer();
 			} catch (error) {
-				return { data: "", resourceURL };
+				return { data: "", resourceURL, charset };
 			}
 			if (DEBUG) {
 				log("  // ENDED   download url =", resourceURL, "delay =", Date.now() - startTime);
 			}
 			if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
-				return { data: "", resourceURL };
+				return { data: "", resourceURL, charset };
 			} else {
 				try {
-					return { data: new TextDecoder(charset).decode(buffer), resourceURL };
+					return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
 				} catch (error) {
 					try {
-						return { data: new TextDecoder("utf-8").decode(buffer), resourceURL };
+						return { data: new TextDecoder("utf-8").decode(buffer), resourceURL, charset };
 					} catch (error) {
-						return { data: "", resourceURL };
+						return { data: "", resourceURL, charset };
 					}
 				}
 			}

+ 15 - 2
lib/single-file/single-file-core.js

@@ -338,12 +338,25 @@ this.SingleFileCore = this.SingleFileCore || (() => {
 			this.stats.set("processed", "resources", this.maxResources);
 		}
 
-		async loadPage(pageContent) {
+		async loadPage(pageContent, charset) {
+			let content;
 			if (!pageContent || this.options.saveRawPage) {
-				const content = await DocUtil.getContent(this.baseURI, { asDataURI: false, maxResourceSize: this.options.maxResourceSize, maxResourceSizeEnabled: this.options.maxResourceSizeEnabled });
+				content = await DocUtil.getContent(this.baseURI, { asDataURI: false, maxResourceSize: this.options.maxResourceSize, maxResourceSizeEnabled: this.options.maxResourceSizeEnabled, charset });
 				pageContent = content.data;
 			}
 			this.doc = DocUtil.createDoc(pageContent, this.baseURI);
+			if (this.options.saveRawPage) {
+				let charset;
+				this.doc.querySelectorAll("meta[charset], meta[http-equiv=\"content-type\"]").forEach(element => {
+					const charsetDeclaration = element.content.split(";")[1];
+					if (charsetDeclaration && !charset) {
+						charset = charsetDeclaration.split("=")[1].trim().toLowerCase();
+					}
+				});
+				if (charset && charset != content.charset) {
+					return this.loadPage(pageContent, charset);
+				}
+			}
 			this.onEventAttributeNames = Util.getOnEventAttributeNames(this.doc);
 		}