Parcourir la source

add `Import MHTML file` feature in the annotation editor

Gildas il y a 2 mois
Parent
commit
f9997aabe8

+ 4 - 0
_locales/az/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Səhifəni çap edin",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "MHTML faylını daxil et",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Gözləyənlər",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/de/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Drucken der Webseite",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "MHTML-Datei importieren",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Ausstehende Speicherungen",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/en/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Print the page",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Import MHTML file",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Pending saves",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/es/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Imprimir la página",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importar archivo MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Guardados pendientes",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/fr/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Imprimer la page",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importer un fichier MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Sauvegardes en cours",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/it/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Stampa la pagina",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importa file MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Salvataggi in attesa",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/ja/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "ページを印刷する",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "MHTMLファイルをインポート",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "保留中の保存",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/nl_NL/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Pagina afdrukken",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importeer MHTML-bestand",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Opslaan in afwachting",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/pl/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Wydrukuj stronę",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Import MHTML file",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Oczekujące zapisy",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/pt_PT/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Imprimir a página",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importar arquivo MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Transferência pendente",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/pt_br/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Imprimir a página",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importar arquivo MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Conteúdo para ser salvo",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 5 - 1
_locales/ru/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Печать страницы",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Import MHTML file",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Отложенное сохранение",
 		"description": "Title of the pending save page 'Pending saves' in the editor"
@@ -1123,4 +1127,4 @@
 		"message": "Неправильный JSON",
 		"description": "Options editor error message: 'Invalid JSON'"
 	}
-}
+}

+ 4 - 0
_locales/sv/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Skriv ut sidan",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Importera MHTML-fil",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Väntande sparingar",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/tr/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Sayfayı yazdır",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "MHTML dosyasını içe aktar",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Bekleyen kaydetmeler",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/uk/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "Роздрукувати сторінку",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "Імпортувати файл MHTML",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "Очікують на збереження",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/zh_CN/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "打印页面",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "导入 MHTML 文件",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "挂起保存项",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 4 - 0
_locales/zh_TW/messages.json

@@ -1007,6 +1007,10 @@
 		"message": "打印頁面",
 		"description": "Title of the button 'Print the page' in the editor"
 	},
+	"editorImportMht": {
+		"message": "導入 MHTML 文件",
+		"description": "Title of the button 'Import MHTML file' in the editor"
+	},
 	"pendingsTitle": {
 		"message": "掛起保存項",
 		"description": "Title of the pending save page 'Pending saves' in the editor"

+ 651 - 0
src/lib/mhtml-to-html/convert.js

@@ -0,0 +1,651 @@
+/* global URL */
+
+import {
+    decodeMimeHeader,
+    parseDOM,
+    decodeBase64,
+    decodeBinary,
+    getCharset,
+    getResourceURI,
+    resolvePath,
+    isStylesheet,
+    EVENT_HANDLER_ATTRIBUTES
+} from "./util.js";
+import * as cssTree from "./vendor/csstree.esm.js";
+import * as srcsetParser from "./srcset-parser.js";
+
+const BASE64_ENCODING = "base64";
+const HREF_ATTRIBUTE = "href";
+const SRC_ATTRIBUTE = "src";
+const TITLE_ATTRIBUTE = "title";
+const SRCSET_ATTRIBUTE = "srcset";
+const SRCDOC_ATTRIBUTE = "srcdoc";
+const CONTENT_ATTRIBUTE = "content";
+const STYLE_ATTRIBUTE = "style";
+const MEDIA_ATTRIBUTE = "media";
+const BACKGROUND_ATTRIBUTE = "background";
+const REL_ATTRIBUTE = "rel";
+const DATA_ATTRIBUTE = "data";
+const TYPE_ATTRIBUTE = "type";
+const PING_ATTRIBUTE = "ping";
+const HTTP_EQUIV_ATTRIBUTE = "http-equiv";
+const INTEGRITY_ATTRIBUTE = "integrity";
+const CHARSET_ATTRIBUTE = "charset";
+const SHADOWMODE_ATTRIBUTE = "shadowmode";
+const SHADOWROOTMODE_ATTRIBUTE = "shadowrootmode";
+const SIZES_ATTRIBUTE = "sizes";
+const STYLESHEET_CONTENT_TYPE = "text/css";
+const CID_PROTOCOL = "cid:";
+const DATA_PROTOCOL = "data:";
+const HTTP_PROTOCOL = "http:";
+const HTTPS_PROTOCOL = "https:";
+const URN_PROTOCOL = "urn:";
+const AT_RULE = "Atrule";
+const IMPORT_RULE = "import";
+const URL_FUNCTION = "Url";
+const STYLESHEET_CONTEXT = "stylesheet";
+const DECLARATION_LIST_CONTEXT = "declarationList";
+const BASE_TAG = "BASE";
+const LINK_TAG = "LINK";
+const STYLE_TAG = "STYLE";
+const IMG_TAG = "IMG";
+const AUDIO_TAG = "AUDIO";
+const VIDEO_TAG = "VIDEO";
+const SOURCE_TAG = "SOURCE";
+const SCRIPT_TAG = "SCRIPT";
+const BODY_TAG = "BODY";
+const TABLE_TAG = "TABLE";
+const TD_TAG = "TD";
+const TH_TAG = "TH";
+const INPUT_TAG = "INPUT";
+const IFRAME_TAG = "IFRAME";
+const FRAME_TAG = "FRAME";
+const EMBED_TAG = "EMBED";
+const OBJECT_TAG = "OBJECT";
+const A_TAG = "A";
+const AREA_TAG = "AREA";
+const META_TAG = "META";
+const TEMPLATE_TAG = "TEMPLATE";
+const HEAD_TAG = "HEAD";
+const TITLE_TAG = "TITLE";
+const ORIGINAL_URL_FUNCTION_NAME = "--mhtml-to-html-url";
+const ORIGINAL_URL_ATTRIBUTE_PREFIX = "data-original-";
+const CONTENT_TYPE_HEADER = "Content-Type";
+const REL_ATTRIBUTE_STYLESHEET = "stylesheet";
+const REL_ATTRIBUTE_ICON = "icon";
+const REL_ATTRIBUTE_CANONICAL = "canonical";
+const REL_ATTRIBUTE_ALTERNATE = "alternate";
+const HTTP_EQUIV_ATTRIBUTE_REFRESH = "refresh";
+const HTTP_EQUIV_ATTRIBUTE_CSP = "content-security-policy";
+const TYPE_ATTRIBUTE_IMAGE = "image";
+const REL_REMOVED_VALUES_REGEXP = /(preconnect|prerender|dns-prefetch|preload|prefetch|manifest|modulepreload)/gi;
+const URN_ERROR_REGEXP = /^urn:[^:]+:(.+)$/;
+const APPLICATION_OCTET_STREAM_CONTENT_TYPE = "application/octet-stream";
+const JSON_LD_CONTENT_TYPE = "application/ld+json";
+const TEXT_HTML_CONTENT_TYPE = "text/html";
+const UTF8_CHARSET = "utf-8";
+const INDEX_PAGE_NOT_FOUND_ERROR = "Index page not found";
+const CID_REGEXP = /^<.+>$/;
+const CONTENT_TYPE_SEPARATOR = ";";
+const DATA_URI_PAYLOAD_SEPARATOR = ",";
+const EMPTY_STRING = "";
+const ORIGINAL_URL_FUNCTION_REGEXP = /url\(--mhtml-to-html-url\\\(\\"(.*?)\\"\\\)\\ /g;
+const ORIGINAL_URL_COMMENT = "/* original URL: $1 */ url(";
+const DEFAULT_CSP = "default-src 'none'; connect-src 'self' data:; font-src 'self' data:; img-src 'self' data:; style-src 'self' 'unsafe-inline' data:; frame-src 'self' data:; media-src 'self' data:; object-src 'self' data:; ";
+const JS_ENABLED_CSP = "script-src 'self' 'unsafe-inline' data:;";
+const JS_DISABLED_CSP = "script-src 'none';";
+const CSS_FUNCTION_PARENTHESIS_START = "(";
+const CSS_FUNCTION_PARENTHESIS_END = ") ";
+const SUBJECT_HEADER = "Subject";
+const DATE_HEADER = "Date";
+const FROM_HEADER = "From";
+const JSON_LD_PAGE_INFO = {
+    "@context": "https://schema.org",
+    "@type": "WebPage",
+    "additionalProperty": {
+        "@type": "PropertyValue",
+        "name": "savedBy"
+    }
+};
+
+export default fetchAndConvert;
+
+async function fetchAndConvert(mhtml, config, failedResources = []) {
+    if (config.fetchMissingResources) {
+        let { fetch } = config;
+        let missingResources = [];
+        if (!fetch) {
+            fetch = globalThis.fetch;
+        }
+        missingResources = convert(mhtml, config);
+        missingResources = missingResources.filter(resource => !failedResources.includes(resource.id));
+        if (missingResources.length) {
+            await Promise.all(missingResources.map(async resource => {
+                const { id, transferEncoding } = resource;
+                let url = id;
+                const urnErrorMatch = url.match(URN_ERROR_REGEXP);
+                if (urnErrorMatch) {
+                    url = urnErrorMatch[1];
+                }
+                try {
+                    const response = await fetch(url);
+                    if (response.ok) {
+                        resource.contentType = response.headers.get(CONTENT_TYPE_HEADER) || APPLICATION_OCTET_STREAM_CONTENT_TYPE;
+                        if (transferEncoding === BASE64_ENCODING) {
+                            const bytes = await response.bytes();
+                            resource.data = decodeBinary(bytes);
+                        } else {
+                            resource.data = await response.text();
+                        }
+                        mhtml.resources[id] = resource;
+                    } else if (!failedResources.includes(id)) {
+                        failedResources.push(id);
+                    }
+                    // eslint-disable-next-line no-unused-vars
+                } catch (_) {
+                    if (!failedResources.includes(id)) {
+                        failedResources.push(id);
+                    }
+                }
+            }));
+            return fetchAndConvert(mhtml, config, failedResources);
+        } else {
+            return convert(mhtml, { ...config, fetchMissingResources: false });
+        }
+    } else {
+        return convert(mhtml, config);
+    }
+}
+
+function convert({ headers, frames, resources, unfoundResources = new Set(), index, id }, { DOMParser, enableScripts, fetchMissingResources } = { DOMParser: globalThis.DOMParser }) {
+    let resource = resources[index];
+    if (!resource) {
+        throw new Error(INDEX_PAGE_NOT_FOUND_ERROR);
+    }
+    let base = resource.id;
+    if (resource.transferEncoding === BASE64_ENCODING) {
+        resource.transferEncoding = undefined;
+        resource.data = decodeBase64(resource.data, getCharset(resource.contentType));
+    }
+    const contentType = resource.contentType.split(CONTENT_TYPE_SEPARATOR)[0];
+    const dom = parseDOM(resource.data, contentType, DOMParser);
+    const document = dom.document;
+    let nodes = [document];
+    const baseElement = document.getElementsByTagName(BASE_TAG)[0];
+    if (baseElement) {
+        const href = baseElement.getAttribute(HREF_ATTRIBUTE);
+        if (href) {
+            base = resolvePath(href, base);
+        }
+        baseElement.remove();
+    }
+    if (!fetchMissingResources) {
+        resource.used = true;
+    }
+    nodes = [document];
+    let canonicalLinkElement;
+    const stylesheets = {};
+    const missingResources = [];
+    const removedNodes = [];
+    const favicons = [];
+    let title;
+    while (nodes.length) {
+        const childNode = nodes.shift();
+        if (childNode.childNodes) {
+            for (const child of childNode.childNodes) {
+                let href, src;
+                if (child.getAttribute) {
+                    href = child.getAttribute(HREF_ATTRIBUTE);
+                    if (href) {
+                        href = resolvePath(href, base);
+                    }
+                    src = child.getAttribute(SRC_ATTRIBUTE);
+                    if (src) {
+                        src = resolvePath(src, base);
+                    }
+                    const style = child.getAttribute(STYLE_ATTRIBUTE);
+                    if (style) {
+                        const declarations = replaceStylesheetUrls(resources, base, { data: style }, { context: DECLARATION_LIST_CONTEXT }, stylesheets, fetchMissingResources && missingResources, unfoundResources);
+                        if (!fetchMissingResources) {
+                            child.setAttribute(STYLE_ATTRIBUTE, declarations);
+                        }
+                    }
+                    const integrity = child.getAttribute(INTEGRITY_ATTRIBUTE);
+                    if (integrity) {
+                        child.removeAttribute(INTEGRITY_ATTRIBUTE);
+                    }
+                }
+                if (!enableScripts && child.removeAttribute) {
+                    EVENT_HANDLER_ATTRIBUTES.forEach(attribute => child.removeAttribute(attribute));
+                }
+                if (child.tagName && child.tagName.toUpperCase() === LINK_TAG && href) {
+                    let rel = child.getAttribute(REL_ATTRIBUTE);
+                    if (rel) {
+                        rel = rel.toLowerCase();
+                        if (rel === REL_ATTRIBUTE_STYLESHEET) {
+                            resource = getResource(resources, href, child.getAttribute(HREF_ATTRIBUTE));
+                            if (resource) {
+                                let base = resource.id;
+                                if (base.startsWith(CID_PROTOCOL)) {
+                                    if (index.match(CID_REGEXP)) {
+                                        base = id;
+                                    } else {
+                                        base = index;
+                                    }
+                                }
+                                const stylesheet = replaceStylesheetUrls(resources, base, resource, { context: STYLESHEET_CONTEXT }, stylesheets, fetchMissingResources && missingResources, unfoundResources);
+                                if (!fetchMissingResources) {
+                                    const styleElement = document.createElement(STYLE_TAG);
+                                    styleElement.type = STYLESHEET_CONTENT_TYPE;
+                                    const media = child.getAttribute(MEDIA_ATTRIBUTE);
+                                    if (media) {
+                                        styleElement.setAttribute(MEDIA_ATTRIBUTE, media);
+                                    }
+                                    resource.used = true;
+                                    resource.data = stylesheet;
+                                    if (!href.startsWith(DATA_PROTOCOL)) {
+                                        styleElement.setAttribute(ORIGINAL_URL_ATTRIBUTE_PREFIX + HREF_ATTRIBUTE, href);
+                                    }
+                                    styleElement.appendChild(document.createTextNode(resource.data));
+                                    child.replaceWith(styleElement);
+                                }
+                            } else if (fetchMissingResources) {
+                                addMissingResource(missingResources, href);
+                            } else {
+                                unfoundResources.add(href);
+                                setAttribute(child, HREF_ATTRIBUTE, href);
+                            }
+                            if (!fetchMissingResources) {
+                                const title = child.getAttribute(TITLE_ATTRIBUTE);
+                                if (title && rel.includes(REL_ATTRIBUTE_ALTERNATE)) {
+                                    removedNodes.push(child);
+                                }
+                            }
+                        } else if (rel.includes(REL_ATTRIBUTE_ICON)) {
+                            resource = getResource(resources, href, child.getAttribute(HREF_ATTRIBUTE));
+                            const media = child.getAttribute(MEDIA_ATTRIBUTE);
+                            const type = child.getAttribute(TYPE_ATTRIBUTE);
+                            const sizes = child.getAttribute(SIZES_ATTRIBUTE);
+                            if (resource) {
+                                if (!fetchMissingResources) {
+                                    resource.used = true;
+                                    const resourceURI = getResourceURI(resource);
+                                    setAttribute(child, HREF_ATTRIBUTE, resourceURI);
+                                    favicons.push({ href: resourceURI, media, type, sizes, originalHref: href });
+                                }
+                            } else if (fetchMissingResources) {
+                                addMissingResource(missingResources, href, BASE64_ENCODING);
+                            } else {
+                                unfoundResources.add(href);
+                                setAttribute(child, HREF_ATTRIBUTE, href);
+                                favicons.push({ href, media, type, sizes });
+                            }
+                        } else if (rel == REL_ATTRIBUTE_CANONICAL && !fetchMissingResources) {
+                            canonicalLinkElement = child;
+                        }
+                        if (!fetchMissingResources) {
+                            const relValue = rel
+                                .replace(REL_REMOVED_VALUES_REGEXP, EMPTY_STRING)
+                                .trim();
+                            if (relValue.length) {
+                                child.setAttribute(REL_ATTRIBUTE, relValue);
+                            } else {
+                                removedNodes.push(child);
+                            }
+                        }
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === STYLE_TAG) {
+                    const style = replaceStylesheetUrls(resources, base, { data: child.textContent }, { context: STYLESHEET_CONTEXT }, stylesheets, fetchMissingResources && missingResources, unfoundResources);
+                    if (!fetchMissingResources) {
+                        const styleElement = document.createElement(STYLE_TAG);
+                        styleElement.type = STYLESHEET_CONTENT_TYPE;
+                        const media = child.getAttribute(MEDIA_ATTRIBUTE);
+                        if (media) {
+                            styleElement.setAttribute(MEDIA_ATTRIBUTE, media);
+                        }
+                        styleElement.appendChild(document.createTextNode(style));
+                        child.replaceWith(styleElement);
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === IMG_TAG || child.tagName && child.tagName.toUpperCase() === AUDIO_TAG || child.tagName && child.tagName.toUpperCase() === VIDEO_TAG || child.tagName && child.tagName.toUpperCase() === SOURCE_TAG || child.tagName && child.tagName.toUpperCase() === SCRIPT_TAG) {
+                    if (src) {
+                        resource = getResource(resources, src, child.getAttribute(SRC_ATTRIBUTE));
+                        if (resource) {
+                            if (!fetchMissingResources) {
+                                resource.used = true;
+                                setAttribute(child, SRC_ATTRIBUTE, getResourceURI(resource));
+                            }
+                        } else if (fetchMissingResources) {
+                            addMissingResource(missingResources, src, BASE64_ENCODING);
+                        } else {
+                            unfoundResources.add(src);
+                            setAttribute(child, SRC_ATTRIBUTE, src);
+                        }
+                    }
+                    if (child.tagName && child.tagName.toUpperCase() === IMG_TAG || child.tagName && child.tagName.toUpperCase() === SOURCE_TAG) {
+                        const srcset = child.getAttribute(SRCSET_ATTRIBUTE);
+                        if (srcset) {
+                            const srcsetData = srcsetParser.parse(srcset).map(data => {
+                                const src = resolvePath(data.url, base);
+                                const resource = getResource(resources, src, data.url);
+                                if (resource) {
+                                    if (!fetchMissingResources) {
+                                        resource.used = true;
+                                        data.url = getResourceURI(resource);
+                                    }
+                                } else if (fetchMissingResources) {
+                                    addMissingResource(missingResources, src, BASE64_ENCODING);
+                                } else {
+                                    unfoundResources.add(src);
+                                    data.url = src;
+                                }
+                                return data;
+                            });
+                            if (!fetchMissingResources) {
+                                setAttribute(child, SRCSET_ATTRIBUTE, srcsetParser.serialize(srcsetData));
+                            }
+                        }
+                    } else if (child.tagName && child.tagName.toUpperCase() === SCRIPT_TAG && !fetchMissingResources) {
+                        let type = child.getAttribute(TYPE_ATTRIBUTE);
+                        if (type) {
+                            type = type.toLowerCase();
+                        }
+                        if (!enableScripts && (!type || type !== JSON_LD_CONTENT_TYPE)) {
+                            removedNodes.push(child);
+                        }
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === BODY_TAG || child.tagName && child.tagName.toUpperCase() === TABLE_TAG || child.tagName && child.tagName.toUpperCase() === TD_TAG || child.tagName && child.tagName.toUpperCase() === TH_TAG) {
+                    let background = child.getAttribute(BACKGROUND_ATTRIBUTE);
+                    if (background && !background.startsWith(DATA_PROTOCOL)) {
+                        background = resolvePath(background, base);
+                        resource = getResource(resources, background, child.getAttribute(BACKGROUND_ATTRIBUTE));
+                        if (resource) {
+                            if (!fetchMissingResources) {
+                                resource.used = true;
+                                setAttribute(child, BACKGROUND_ATTRIBUTE, getResourceURI(resource));
+                            }
+                        } else if (fetchMissingResources) {
+                            addMissingResource(missingResources, background, BASE64_ENCODING);
+                        } else {
+                            unfoundResources.add(background);
+                            setAttribute(child, BACKGROUND_ATTRIBUTE, background);
+                        }
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === INPUT_TAG) {
+                    const type = child.getAttribute(TYPE_ATTRIBUTE);
+                    if (type && type.toLowerCase() === TYPE_ATTRIBUTE_IMAGE && src) {
+                        resource = getResource(resources, src, child.getAttribute(SRC_ATTRIBUTE));
+                        if (resource) {
+                            if (!fetchMissingResources) {
+                                resource.used = true;
+                                setAttribute(child, SRC_ATTRIBUTE, getResourceURI(resource));
+                            }
+                        } else if (fetchMissingResources) {
+                            addMissingResource(missingResources, src, BASE64_ENCODING);
+                        } else {
+                            unfoundResources.add(src);
+                            setAttribute(child, SRC_ATTRIBUTE, src);
+                        }
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === IFRAME_TAG || child.tagName && child.tagName.toUpperCase() === FRAME_TAG || child.tagName && child.tagName.toUpperCase() === EMBED_TAG || child.tagName && child.tagName.toUpperCase() === OBJECT_TAG) {
+                    let id, attribute;
+                    if (child.tagName && child.tagName.toUpperCase() === OBJECT_TAG) {
+                        attribute = DATA_ATTRIBUTE;
+                        src = child.getAttribute(DATA_ATTRIBUTE);
+                        if (src) {
+                            src = resolvePath(src, base);
+                        }
+                    } else {
+                        attribute = SRC_ATTRIBUTE;
+                    }
+                    if (src) {
+                        if (src.startsWith(CID_PROTOCOL)) {
+                            id = `<${src.split(CID_PROTOCOL)[1]}>`;
+                            resource = frames[id];
+                        } else {
+                            id = src;
+                            resource = getResource(resources, src, child.getAttribute(attribute));
+                        }
+                        if (resource) {
+                            if (child.tagName && child.tagName.toUpperCase() === EMBED_TAG || child.tagName && child.tagName.toUpperCase() === OBJECT_TAG) {
+                                if (!fetchMissingResources) {
+                                    resource.used = true;
+                                    setAttribute(child, attribute, getResourceURI(resource));
+                                }
+                            } else {
+                                const result = convert({
+                                    resources: Object.assign({}, resources, { [id]: resource }),
+                                    unfoundResources,
+                                    frames: frames,
+                                    index: id,
+                                    id: resource.id
+                                }, { DOMParser, enableScripts, fetchMissingResources });
+                                if (fetchMissingResources) {
+                                    for (const missingResource of result) {
+                                        if (!missingResources.find(resource => resource.id === missingResource.id)) {
+                                            missingResources.push(missingResource);
+                                        }
+                                    }
+                                } else {
+                                    resource.used = true;
+                                    if (child.tagName && child.tagName.toUpperCase() === IFRAME_TAG) {
+                                        setAttribute(child, SRC_ATTRIBUTE);
+                                        child.removeAttribute(SRC_ATTRIBUTE);
+                                        child.setAttribute(SRCDOC_ATTRIBUTE, result.data);
+                                    } else {
+                                        setAttribute(child, attribute, DATA_PROTOCOL + TEXT_HTML_CONTENT_TYPE + DATA_URI_PAYLOAD_SEPARATOR + encodeURIComponent(result.data));
+                                    }
+                                }
+                            }
+                        } else if (fetchMissingResources) {
+                            addMissingResource(missingResources, src);
+                        } else {
+                            unfoundResources.add(src);
+                            setAttribute(child, attribute, src);
+                        }
+                    }
+                } else if ((child.tagName && child.tagName.toUpperCase() === A_TAG || child.tagName && child.tagName.toUpperCase() === AREA_TAG) && !fetchMissingResources) {
+                    if (href) {
+                        try {
+                            const url = new URL(child.getAttribute(HREF_ATTRIBUTE), base);
+                            const hash = url.hash;
+                            url.hash = EMPTY_STRING;
+                            if (url == base && hash) {
+                                child.setAttribute(HREF_ATTRIBUTE, hash);
+                            } else {
+                                child.setAttribute(HREF_ATTRIBUTE, href);
+                            }
+                            // eslint-disable-next-line no-unused-vars
+                        } catch (_) {
+                            child.setAttribute(HREF_ATTRIBUTE, href);
+                        }
+                    }
+                    child.removeAttribute(PING_ATTRIBUTE);
+                } else if (child.tagName && child.tagName.toUpperCase() === META_TAG && !fetchMissingResources) {
+                    let httpEquiv = child.getAttribute(HTTP_EQUIV_ATTRIBUTE);
+                    if (httpEquiv) {
+                        httpEquiv = httpEquiv.toLowerCase();
+                        if (httpEquiv === HTTP_EQUIV_ATTRIBUTE_REFRESH || httpEquiv === HTTP_EQUIV_ATTRIBUTE_CSP) {
+                            removedNodes.push(child);
+                        }
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === TEMPLATE_TAG && !fetchMissingResources) {
+                    const shadowModeAttribute = child.getAttribute(SHADOWMODE_ATTRIBUTE);
+                    if (shadowModeAttribute) {
+                        child.removeAttribute(SHADOWMODE_ATTRIBUTE);
+                        child.setAttribute(SHADOWROOTMODE_ATTRIBUTE, shadowModeAttribute);
+                    }
+                    if (child.content) {
+                        child.content.childNodes.forEach(node => nodes.push(node));
+                    }
+                } else if (child.tagName && child.tagName.toUpperCase() === TITLE_TAG && !fetchMissingResources && childNode.tagName && childNode.tagName.toUpperCase() === HEAD_TAG && title === undefined && child.textContent) {
+                    title = child.textContent;
+                }
+                nodes.push(child);
+            }
+        }
+    }
+    if (fetchMissingResources) {
+        return missingResources;
+    } else {
+        removedNodes.forEach(node => node.remove());
+        if (!canonicalLinkElement) {
+            const linkElement = document.createElement(LINK_TAG);
+            linkElement.setAttribute(REL_ATTRIBUTE, REL_ATTRIBUTE_CANONICAL);
+            linkElement.setAttribute(HREF_ATTRIBUTE, index);
+            document.head.appendChild(linkElement);
+        }
+        let metaElement = document.createElement(META_TAG);
+        metaElement.setAttribute(HTTP_EQUIV_ATTRIBUTE, HTTP_EQUIV_ATTRIBUTE_CSP);
+        let csp = DEFAULT_CSP;
+        if (enableScripts) {
+            csp += JS_ENABLED_CSP;
+        } else {
+            csp += JS_DISABLED_CSP;
+        }
+        metaElement.setAttribute(CONTENT_ATTRIBUTE, csp);
+        if (document.head.firstChild) {
+            document.head.prepend(metaElement);
+        } else {
+            document.head.appendChild(metaElement);
+        }
+        metaElement.setAttribute(CONTENT_ATTRIBUTE, csp);
+        metaElement = document.createElement(META_TAG);
+        metaElement.setAttribute(CHARSET_ATTRIBUTE, UTF8_CHARSET);
+        document.head.prepend(metaElement);
+        if (headers) {
+            const pageInfoElement = document.createElement(SCRIPT_TAG);
+            pageInfoElement.setAttribute(TYPE_ATTRIBUTE, JSON_LD_CONTENT_TYPE);
+            pageInfoElement.appendChild(document.createTextNode(JSON.stringify(getPageInfo(headers, index), null, 2)));
+            if (document.head.firstChild) {
+                document.head.firstChild.after(pageInfoElement);
+            } else {
+                document.head.appendChild(pageInfoElement);
+            }
+        }
+        if (unfoundResources.size) {
+            unfoundResources.forEach(id => {
+                if (!id.startsWith(DATA_PROTOCOL)) {
+                    resources[id] = { id, notFound: true, used: true };
+                }
+            });
+        }
+        return {
+            title,
+            favicons,
+            data: dom.serialize()
+        };
+    }
+}
+
+function setAttribute(element, attribute, newValue) {
+    const value = element.getAttribute(attribute);
+    if (value && !value.startsWith(DATA_PROTOCOL) && value !== newValue) {
+        element.setAttribute(ORIGINAL_URL_ATTRIBUTE_PREFIX + attribute, value);
+    }
+    if (newValue !== undefined) {
+        element.setAttribute(attribute, newValue);
+    }
+}
+
+function replaceStylesheetUrls(resources, base, resource, options = {}, stylesheets, missingResources, unfoundResources) {
+    let ast;
+    if (resource.id !== undefined) {
+        if (stylesheets[resource.id]) {
+            return stylesheets[resource.id].data;
+        } else {
+            stylesheets[resource.id] = {};
+        }
+    }
+    try {
+        ast = cssTree.parse(resource.data, options);
+        // eslint-disable-next-line no-unused-vars
+    } catch (_) {
+        // ignored
+    }
+    if (ast) {
+        cssTree.walk(ast, node => {
+            if (node.type === URL_FUNCTION) {
+                const path = node.value;
+                if (!path.startsWith(DATA_PROTOCOL) && !path.startsWith(ORIGINAL_URL_FUNCTION_NAME)) {
+                    const id = resolvePath(path, base);
+                    const resource = getResource(resources, id, path);
+                    if (resource) {
+                        if (!missingResources) {
+                            resource.used = true;
+                            if (isStylesheet(resource.contentType)) {
+                                resource.data = replaceStylesheetUrls(resources, resource.id, resource, { context: STYLESHEET_CONTEXT }, stylesheets, missingResources, unfoundResources);
+                            }
+                            node.value = getOriginalUrlFunction(id, getResourceURI(resource));
+                        }
+                    } else if (missingResources) {
+                        addMissingResource(missingResources, id, BASE64_ENCODING);
+                    } else {
+                        unfoundResources.add(id);
+                        node.value = getOriginalUrlFunction(id);
+                    }
+                }
+            } else if (node.type === AT_RULE && node.name.toLowerCase() === IMPORT_RULE) {
+                const path = node.prelude.children.first.value;
+                if (!path.startsWith(DATA_PROTOCOL) && !path.startsWith(ORIGINAL_URL_FUNCTION_NAME)) {
+                    const id = resolvePath(path, base);
+                    const resource = getResource(resources, id, path);
+                    if (resource) {
+                        resource.data = replaceStylesheetUrls(resources, resource.id, resource, { context: STYLESHEET_CONTEXT }, stylesheets, missingResources, unfoundResources);
+                        if (!missingResources) {
+                            resource.used = true;
+                            node.prelude.children.first.value = getOriginalUrlFunction(id, getResourceURI(resource));
+                        }
+                    } else if (missingResources) {
+                        addMissingResource(missingResources, id);
+                    } else {
+                        unfoundResources.add(id);
+                        node.prelude.children.first.value = getOriginalUrlFunction(id);
+                    }
+                }
+            }
+        });
+        try {
+            const result = cssTree.generate(ast);
+            if (resource.id !== undefined) {
+                stylesheets[resource.id].data = result;
+            }
+            return result.replace(ORIGINAL_URL_FUNCTION_REGEXP, ORIGINAL_URL_COMMENT);
+            // eslint-disable-next-line no-unused-vars
+        } catch (_) {
+            return resource.data;
+        }
+    } else {
+        return resource.data;
+    }
+}
+
+function getResource(resources, id, rawId) {
+    let resource = resources[id];
+    if (!resource) {
+        resource = resources[rawId];
+    }
+    return resource;
+}
+
+function addMissingResource(missingResources, id, transferEncoding) {
+    if ((id.startsWith(HTTP_PROTOCOL) || id.startsWith(HTTPS_PROTOCOL) || id.startsWith(URN_PROTOCOL)) && !missingResources.find(resource => resource.id === id)) {
+        missingResources.push({ id, transferEncoding });
+    }
+}
+
+function getOriginalUrlFunction(id, resourceURI = id) {
+    return ORIGINAL_URL_FUNCTION_NAME + CSS_FUNCTION_PARENTHESIS_START + JSON.stringify(id) + CSS_FUNCTION_PARENTHESIS_END + resourceURI;
+}
+
+function getPageInfo(headers, index) {
+    return {
+        ...JSON_LD_PAGE_INFO,
+        url: index,
+        name: decodeMimeHeader(headers[SUBJECT_HEADER]),
+        dateCreated: headers[DATE_HEADER],
+        additionalProperty: {
+            ...JSON_LD_PAGE_INFO.additionalProperty,
+            value: decodeMimeHeader(headers[FROM_HEADER])
+        }
+    };
+}

+ 50 - 0
src/lib/mhtml-to-html/mod.js

@@ -0,0 +1,50 @@
+/// <reference types="./mod.d.ts" />
+
+// derived from https://github.com/msindwan/mhtml2html
+
+/**
+ * The MIT License(MIT)
+ *
+ * Copyright(c) 2016 Mayank Sindwani (https://github.com/msindwan/mhtml2html)
+ * Copyright(c) 2025 Gildas Lormeau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files(the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions :
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+import modConvert from "./convert.js";
+import modParse from "./parse.js";
+
+export { convert, parse };
+
+function convert(mhtml, config = {}) {
+    if (config.DOMParser === undefined && globalThis.DOMParser) {
+        config.DOMParser = globalThis.DOMParser;
+    }
+    if ((typeof mhtml === "string") || mhtml instanceof Uint8Array) {
+        mhtml = parse(mhtml, config);
+    }
+    return modConvert(mhtml, config);
+}
+
+function parse(data, config = {}) {
+    if (config.DOMParser === undefined && globalThis.DOMParser) {
+        config.DOMParser = globalThis.DOMParser;
+    }
+    return modParse(data, config);
+}

+ 305 - 0
src/lib/mhtml-to-html/parse.js

@@ -0,0 +1,305 @@
+import {
+    decodeQuotedPrintable,
+    decodeBinary,
+    parseDOM,
+    decodeString,
+    encodeString,
+    getCharset,
+    replaceCharset,
+    isDocument,
+    isStylesheet,
+    isText,
+    isMultipartAlternative,
+    getBoundary,
+    indexOf,
+    startsWithBoundary,
+    isLineFeed,
+    endsWithCRLF,
+    endsWithLF
+} from "./util.js";
+import * as cssTree from "./vendor/csstree.esm.js";
+
+const MHTML_HEADERS = 0;
+const MTHML_CONTENT = 1;
+const MHTML_DATA = 2;
+const MHTML_END = 3;
+const STRING_TYPE = "string";
+const HEADER_SEPARATOR = ":";
+const QUOTED_PRINTABLE_ENCODING = "quoted-printable";
+const BINARY_ENCODING = "binary";
+const CONTENT_TYPE_HEADER = "content-type";
+const CONTENT_TRANSFER_ENCODING_HEADER = "content-transfer-encoding";
+const CONTENT_ID_HEADER = "content-id";
+const CONTENT_LOCATION_HEADER = "content-location";
+const BASE64_ENCODING = "base64";
+const UTF8_CHARSET = "utf-8";
+const META_TAG = "META";
+const CONTENT_ATTRIBUTE = "content";
+const CHARSET_ATTRIBUTE = "charset";
+const HTTP_EQUIV_ATTRIBUTE = "http-equiv";
+const AT_RULE = "Atrule";
+const CHARSET_IDENTIFIER = "charset";
+const RANDOM_ID_PREFIX = "_";
+
+export default parse;
+
+function parse(mhtml, { DOMParser } = { DOMParser: globalThis.DOMParser }, context = { resources: {}, frames: {} }) {
+    // deno-lint-ignore valid-typeof
+    if (typeof mhtml === STRING_TYPE) {
+        mhtml = encodeString(mhtml);
+    }
+    const headers = {};
+    const { resources, frames } = context;
+    let resource, transferEncoding, boundary, headerKey;
+    let content = {};
+    let state = MHTML_HEADERS;
+    let indexMhtml = 0;
+    let indexStartEmbeddedMhtml;
+    while (state !== MHTML_END && indexMhtml < mhtml.length - 1) {
+        let next;
+        if (state === MHTML_HEADERS) {
+            next = getLine();
+            if (!isLineFeed(next)) {
+                splitHeaders(next, headers);
+            } else {
+                if (headers[CONTENT_TYPE_HEADER]) {
+                    boundary = getBoundary(headers[CONTENT_TYPE_HEADER]);
+                }
+                if (boundary) {
+                    while (indexOf(next, boundary) === -1 && indexMhtml < mhtml.length - 1) {
+                        next = getLine();
+                    }
+                } else {
+                    const previousIndex = indexMhtml;
+                    next = getLine(transferEncoding);
+                    if (!boundary && startsWithBoundary(next)) {
+                        boundary = decodeString(next);
+                    } else {
+                        indexMhtml = previousIndex;
+                    }
+                }
+                content = {};
+                state = MTHML_CONTENT;
+            }
+        } else if (state === MTHML_CONTENT) {
+            if (boundary) {
+                if (indexStartEmbeddedMhtml === undefined) {
+                    indexStartEmbeddedMhtml = indexMhtml;
+                }
+                next = getLine();
+                if (!isLineFeed(next)) {
+                    splitHeaders(next, content);
+                } else {
+                    initResource(content);
+                    if (!resource.contentType || !isMultipartAlternative(resource.contentType)) {
+                        indexStartEmbeddedMhtml = undefined;
+                    }
+                    state = MHTML_DATA;
+                }
+            } else {
+                initResource(headers);
+                state = MHTML_DATA;
+            }
+        } else if (state === MHTML_DATA) {
+            const indexEndData = parseResourceData();
+            if (indexStartEmbeddedMhtml !== undefined && indexEndData !== undefined) {
+                resource.used = true;
+                context.index = convertEmbeddedMhtml(indexEndData);
+            } else {
+                processResource();
+            }
+            state = (indexMhtml >= mhtml.length - 1 ? MHTML_END : MTHML_CONTENT);
+        }
+    }
+    return { headers, frames, resources, index: context.index };
+
+    function getLine(transferEncoding) {
+        const indexStart = indexMhtml;
+        while (!isLineFeed([mhtml[indexMhtml]]) && indexMhtml++ < mhtml.length - 1);
+        indexMhtml++;
+        const line = mhtml.slice(indexStart, indexMhtml);
+        return transferEncoding === QUOTED_PRINTABLE_ENCODING ? decodeQuotedPrintable(line) : line;
+    }
+
+    function splitHeaders(line, obj) {
+        const lineString = decodeString(line);
+        const indexColumn = lineString.indexOf(HEADER_SEPARATOR);
+        if (indexColumn > -1) {
+            headerKey = lineString.substring(0, indexColumn).trim().toLowerCase();
+            obj[headerKey] = lineString.substring(indexColumn + 1, lineString.length).trim();
+        } else {
+            obj[headerKey] += lineString.trim();
+        }
+    }
+
+    function initResource(resourceData) {
+        transferEncoding = resourceData[CONTENT_TRANSFER_ENCODING_HEADER];
+        const contentType = resourceData[CONTENT_TYPE_HEADER];
+        const contentId = resourceData[CONTENT_ID_HEADER];
+        let id = resourceData[CONTENT_LOCATION_HEADER];
+        if (transferEncoding) {
+            transferEncoding = transferEncoding.toLowerCase();
+        }
+        resource = {
+            transferEncoding,
+            contentType,
+            data: [],
+            id
+        };
+        if (id === undefined) {
+            if (contentId !== undefined) {
+                id = contentId;
+            } else {
+                do {
+                    id = RANDOM_ID_PREFIX + Math.random().toString(36).substring(2);
+                } while (resources[id]);
+            }
+            resource.id = id;
+        }
+        if (context.index === undefined && isDocument(contentType)) {
+            context.index = id;
+        }
+        if (contentId !== undefined) {
+            frames[contentId] = resource;
+        }
+        if (!resources[id]) {
+            resources[id] = resource;
+        }
+        content = {};
+    }
+
+    function parseResourceData() {
+        let next = getLine(transferEncoding);
+        let indexEndData, boundaryFound;
+        while (!boundaryFound && indexMhtml < mhtml.length - 1) {
+            indexEndData = indexMhtml;
+            const indexBoundary = indexOf(next, boundary);
+            if (indexBoundary !== -1) {
+                indexEndData = indexEndData - next.length + indexBoundary - 2;
+                if (indexBoundary > 2) {
+                    next = next.slice(0, indexBoundary - 2);
+                } else {
+                    next = [];
+                }
+                boundaryFound = true;
+            }
+            if (resource.transferEncoding === QUOTED_PRINTABLE_ENCODING) {
+                if (resource.data.length > 2 && resource.data[resource.data.length - 3] === 0x3D && endsWithCRLF(next)) {
+                    resource.data.splice(resource.data.length - 3, 3);
+                } else if (resource.data.length > 1 && resource.data[resource.data.length - 2] === 0x3D && endsWithLF(next)) {
+                    resource.data.splice(resource.data.length - 2, 2);
+                }
+            } else if (resource.transferEncoding === BASE64_ENCODING) {
+                if (endsWithCRLF(next)) {
+                    next = next.slice(0, next.length - 2);
+                } else if (endsWithLF(next)) {
+                    next = next.slice(0, next.length - 1);
+                }
+            }
+            resource.data.splice(resource.data.length, 0, ...next);
+            if (!boundaryFound) {
+                next = getLine(transferEncoding);
+            }
+        }
+        if (!boundaryFound && boundary) {
+            indexEndData = indexMhtml;
+        }
+        return indexEndData;
+    }
+
+    function convertEmbeddedMhtml(indexEnd) {
+        const context = { resources, frames };
+        if (endsWithCRLF(mhtml)) {
+            indexEnd -= 2;
+        } else if (endsWithLF(mhtml)) {
+            indexEnd--;
+        }
+        parse(mhtml.slice(indexStartEmbeddedMhtml, indexEnd), { DOMParser }, context);
+        return context.index;
+    }
+
+    function processResource() {
+        resource.data = resource.rawData = new Uint8Array(resource.data);
+        const charset = resource.contentType ? getCharset(resource.contentType) : undefined;
+        if (resource.transferEncoding === BINARY_ENCODING && (!resource.contentType || !isText(resource.contentType))) {
+            resource.transferEncoding = BASE64_ENCODING;
+            resource.data = decodeBinary(resource.data);
+        } else {
+            resource.data = decodeString(resource.data, charset);
+        }
+        if (resource.contentType) {
+            resource.contentType = replaceCharset(resource.contentType, UTF8_CHARSET);
+            if (isStylesheet(resource.contentType)) {
+                processStylesheetCharset(charset);
+            } else if (isDocument(resource.contentType)) {
+                processDocumentCharset(charset);
+            }
+        }
+        delete resource.rawData;
+    }
+
+    function processStylesheetCharset(charset) {
+        try {
+            let ast = cssTree.parse(resource.data);
+            if (ast.children.first && ast.children.first.type === AT_RULE && ast.children.first.name.toLowerCase() === CHARSET_IDENTIFIER) {
+                const charsetNode = ast.children.first;
+                const cssCharset = charsetNode.prelude.children.first.value.toLowerCase();
+                if (cssCharset !== UTF8_CHARSET && cssCharset !== charset) {
+                    resource.data = decodeString(resource.rawData, cssCharset);
+                    ast = cssTree.parse(resource.data);
+                }
+                ast.children.remove(ast.children.head);
+                resource.data = cssTree.generate(ast);
+            }
+            // eslint-disable-next-line no-unused-vars
+        } catch (_) {
+            // ignored
+        }
+    }
+
+    function processDocumentCharset(charset) {
+        const contentType = resource.contentType.split(";")[0];
+        let dom = parseDOM(resource.data, contentType, DOMParser);
+        let charserMetaElement = getMetaCharsetElement(dom.document.documentElement);
+        if (charserMetaElement) {
+            let htmlCharset = charserMetaElement.getAttribute(CHARSET_ATTRIBUTE);
+            if (htmlCharset) {
+                htmlCharset = htmlCharset.toLowerCase();
+                if (htmlCharset !== UTF8_CHARSET && htmlCharset !== charset) {
+                    resource.data = decodeString(resource.rawData, charset);
+                    dom = parseDOM(resource.data, contentType, DOMParser);
+                    charserMetaElement = getMetaCharsetElement(dom.document.documentElement);
+                }
+            }
+            if (charserMetaElement) {
+                charserMetaElement.remove();
+            }
+            resource.data = dom.serialize();
+        }
+        let metaElement = getMetaContentTypeElement(dom.document);
+        if (metaElement) {
+            const contentType = metaElement.getAttribute(CONTENT_ATTRIBUTE);
+            const htmlCharset = getCharset(contentType);
+            if (htmlCharset && htmlCharset !== UTF8_CHARSET && htmlCharset !== charset) {
+                resource.data = decodeString(resource.rawData, htmlCharset);
+                dom = parseDOM(resource.data, contentType, DOMParser);
+                metaElement = getMetaContentTypeElement(dom.document.documentElement);
+            }
+            if (metaElement) {
+                metaElement.remove();
+            }
+            resource.data = dom.serialize();
+        }
+    }
+}
+
+function getMetaCharsetElement(document) {
+    const metaElements = document.getElementsByTagName(META_TAG);
+    return Array.from(metaElements).find(metaElement => metaElement.getAttribute(CHARSET_ATTRIBUTE));
+}
+
+function getMetaContentTypeElement(document) {
+    const metaElements = document.getElementsByTagName(META_TAG);
+    return Array.from(metaElements).find(metaElement => metaElement.getAttribute(HTTP_EQUIV_ATTRIBUTE)
+        && metaElement.getAttribute(HTTP_EQUIV_ATTRIBUTE).toLowerCase() === CONTENT_TYPE_HEADER.toLowerCase());
+}

+ 355 - 0
src/lib/mhtml-to-html/srcset-parser.js

@@ -0,0 +1,355 @@
+// deno-lint-ignore-file no-control-regex
+
+// derived from https://github.com/albell/parse-srcset
+
+/*
+ * The MIT License (MIT)
+ * 
+ * Author: Gildas Lormeau
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * Srcset Parser
+ *
+ * By Alex Bell |  MIT License
+ *
+ * JS Parser for the string value that appears in markup <img srcset="here">
+ *
+ * @returns Array [{url: _, d: _, w: _, h:_}, ...]
+ *
+ * Based super duper closely on the reference algorithm at:
+ * https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
+ *
+ * Most comments are copied in directly from the spec
+ * (except for comments in parens).
+ */
+
+export { parse, serialize };
+
+// 1. Let input be the value passed to this algorithm.
+function parse(input) {
+
+	// UTILITY FUNCTIONS
+
+	// Manual is faster than RegEx
+	// http://bjorn.tipling.com/state-and-regular-expressions-in-javascript
+	// http://jsperf.com/whitespace-character/5
+	function isSpace(c) {
+		return (c === "\u0020" || // space
+			c === "\u0009" || // horizontal tab
+			c === "\u000A" || // new line
+			c === "\u000C" || // form feed
+			c === "\u000D");  // carriage return
+	}
+
+	function collectCharacters(regEx) {
+		let chars;
+		const match = regEx.exec(input.substring(pos));
+		if (match) {
+			chars = match[0];
+			pos += chars.length;
+			return chars;
+		}
+	}
+
+	const inputLength = input.length;
+
+	// (Don"t use \s, to avoid matching non-breaking space)
+	/* eslint-disable no-control-regex */
+	const regexLeadingSpaces = /^[ \t\n\r\u000c]+/;
+	const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
+	const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
+	const regexTrailingCommas = /[,]+$/;
+	const regexNonNegativeInteger = /^\d+$/;
+	/* eslint-enable no-control-regex */
+
+	// ( Positive or negative or unsigned integers or decimals, without or without exponents.
+	// Must include at least one digit.
+	// According to spec tests any decimal point must be followed by a digit.
+	// No leading plus sign is allowed.)
+	// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
+	const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
+
+	let url, descriptors, currentDescriptor, state, c,
+		// 2. Let position be a pointer into input, initially pointing at the start
+		//    of the string.
+		pos = 0;
+	// 3. Let candidates be an initially empty source set.
+	const candidates = [];
+
+	// 4. Splitting loop: Collect a sequence of characters that are space
+	//    characters or U+002C COMMA characters. If any U+002C COMMA characters
+	//    were collected, that is a parse error.		
+	while (true) {
+		collectCharacters(regexLeadingCommasOrSpaces);
+
+		// 5. If position is past the end of input, return candidates and abort these steps.
+		if (pos >= inputLength) {
+			return candidates; // (we"re done, this is the sole return path)
+		}
+
+		// 6. Collect a sequence of characters that are not space characters,
+		//    and let that be url.
+		url = collectCharacters(regexLeadingNotSpaces);
+
+		// 7. Let descriptors be a new empty list.
+		descriptors = [];
+
+		// 8. If url ends with a U+002C COMMA character (,), follow these substeps:
+		//		(1). Remove all trailing U+002C COMMA characters from url. If this removed
+		//         more than one character, that is a parse error.
+		if (url.slice(-1) === ",") {
+			url = url.replace(regexTrailingCommas, "");
+			// (Jump ahead to step 9 to skip tokenization and just push the candidate).
+			parseDescriptors();
+
+			//	Otherwise, follow these substeps:
+		} else {
+			tokenize();
+		} // (close else of step 8)
+
+		// 16. Return to the step labeled splitting loop.
+	} // (Close of big while loop.)
+
+	/**
+	 * Tokenizes descriptor properties prior to parsing
+	 * Returns undefined.
+	 */
+	function tokenize() {
+
+		// 8.1. Descriptor tokeniser: Skip whitespace
+		collectCharacters(regexLeadingSpaces);
+
+		// 8.2. Let current descriptor be the empty string.
+		currentDescriptor = "";
+
+		// 8.3. Let state be in descriptor.
+		state = "in descriptor";
+
+		while (true) {
+
+			// 8.4. Let c be the character at position.
+			c = input.charAt(pos);
+
+			//  Do the following depending on the value of state.
+			//  For the purpose of this step, "EOF" is a special character representing
+			//  that position is past the end of input.
+
+			// In descriptor
+			if (state === "in descriptor") {
+				// Do the following, depending on the value of c:
+
+				// Space character
+				// If current descriptor is not empty, append current descriptor to
+				// descriptors and let current descriptor be the empty string.
+				// Set state to after descriptor.
+				if (isSpace(c)) {
+					if (currentDescriptor) {
+						descriptors.push(currentDescriptor);
+						currentDescriptor = "";
+						state = "after descriptor";
+					}
+
+					// U+002C COMMA (,)
+					// Advance position to the next character in input. If current descriptor
+					// is not empty, append current descriptor to descriptors. Jump to the step
+					// labeled descriptor parser.
+				} else if (c === ",") {
+					pos += 1;
+					if (currentDescriptor) {
+						descriptors.push(currentDescriptor);
+					}
+					parseDescriptors();
+					return;
+
+					// U+0028 LEFT PARENTHESIS (()
+					// Append c to current descriptor. Set state to in parens.
+				} else if (c === "\u0028") {
+					currentDescriptor = currentDescriptor + c;
+					state = "in parens";
+
+					// EOF
+					// If current descriptor is not empty, append current descriptor to
+					// descriptors. Jump to the step labeled descriptor parser.
+				} else if (c === "") {
+					if (currentDescriptor) {
+						descriptors.push(currentDescriptor);
+					}
+					parseDescriptors();
+					return;
+
+					// Anything else
+					// Append c to current descriptor.
+				} else {
+					currentDescriptor = currentDescriptor + c;
+				}
+				// (end "in descriptor"
+
+				// In parens
+			} else if (state === "in parens") {
+
+				// U+0029 RIGHT PARENTHESIS ())
+				// Append c to current descriptor. Set state to in descriptor.
+				if (c === ")") {
+					currentDescriptor = currentDescriptor + c;
+					state = "in descriptor";
+
+					// EOF
+					// Append current descriptor to descriptors. Jump to the step labeled
+					// descriptor parser.
+				} else if (c === "") {
+					descriptors.push(currentDescriptor);
+					parseDescriptors();
+					return;
+
+					// Anything else
+					// Append c to current descriptor.
+				} else {
+					currentDescriptor = currentDescriptor + c;
+				}
+
+				// After descriptor
+			} else if (state === "after descriptor") {
+
+				// Do the following, depending on the value of c:
+				// Space character: Stay in this state.
+				if (isSpace(c)) {
+
+					// EOF: Jump to the step labeled descriptor parser.
+				} else if (c === "") {
+					parseDescriptors();
+					return;
+
+					// Anything else
+					// Set state to in descriptor. Set position to the previous character in input.
+				} else {
+					state = "in descriptor";
+					pos -= 1;
+
+				}
+			}
+
+			// Advance position to the next character in input.
+			pos += 1;
+
+			// Repeat this step.
+		} // (close while true loop)
+	}
+
+	/**
+	 * Adds descriptor properties to a candidate, pushes to the candidates array
+	 * @return undefined
+	 */
+	// Declared outside of the while loop so that it"s only created once.
+	function parseDescriptors() {
+
+		// 9. Descriptor parser: Let error be no.
+		let pError = false,
+
+			// 10. Let width be absent.
+			// 11. Let density be absent.
+			// 12. Let future-compat-h be absent. (We"re implementing it now as h)
+			w, d, h, i,
+			desc, lastChar, value, intVal, floatVal;
+		const candidate = {};
+
+		// 13. For each descriptor in descriptors, run the appropriate set of steps
+		// from the following list:
+		for (i = 0; i < descriptors.length; i++) {
+			desc = descriptors[i];
+
+			lastChar = desc[desc.length - 1];
+			value = desc.substring(0, desc.length - 1);
+			intVal = parseInt(value, 10);
+			floatVal = parseFloat(value);
+
+			// If the descriptor consists of a valid non-negative integer followed by
+			// a U+0077 LATIN SMALL LETTER W character
+			if (regexNonNegativeInteger.test(value) && (lastChar === "w")) {
+
+				// If width and density are not both absent, then let error be yes.
+				if (w || d) { pError = true; }
+
+				// Apply the rules for parsing non-negative integers to the descriptor.
+				// If the result is zero, let error be yes.
+				// Otherwise, let width be the result.
+				if (intVal === 0) { pError = true; } else { w = intVal; }
+
+				// If the descriptor consists of a valid floating-point number followed by
+				// a U+0078 LATIN SMALL LETTER X character
+			} else if (regexFloatingPoint.test(value) && (lastChar === "x")) {
+
+				// If width, density and future-compat-h are not all absent, then let error
+				// be yes.
+				if (w || d || h) { pError = true; }
+
+				// Apply the rules for parsing floating-point number values to the descriptor.
+				// If the result is less than zero, let error be yes. Otherwise, let density
+				// be the result.
+				if (floatVal < 0) { pError = true; } else { d = floatVal; }
+
+				// If the descriptor consists of a valid non-negative integer followed by
+				// a U+0068 LATIN SMALL LETTER H character
+			} else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) {
+
+				// If height and density are not both absent, then let error be yes.
+				if (h || d) { pError = true; }
+
+				// Apply the rules for parsing non-negative integers to the descriptor.
+				// If the result is zero, let error be yes. Otherwise, let future-compat-h
+				// be the result.
+				if (intVal === 0) { pError = true; } else { h = intVal; }
+
+				// Anything else, Let error be yes.
+			} else { pError = true; }
+		} // (close step 13 for loop)
+
+		// 15. If error is still no, then append a new image source to candidates whose
+		// URL is url, associated with a width width if not absent and a pixel
+		// density density if not absent. Otherwise, there is a parse error.
+		if (!pError) {
+			candidate.url = url;
+			if (w) { candidate.w = w; }
+			if (d) { candidate.d = d; }
+			if (h) { candidate.h = h; }
+			candidates.push(candidate);
+		} else if (console && console.log) {  // eslint-disable-line no-console
+			console.log("Invalid srcset descriptor found in \"" + input + "\" at \"" + desc + "\"."); // eslint-disable-line no-console
+		}
+	} // (close parseDescriptors fn)
+
+}
+
+function serialize(srcset) {
+	return srcset.map(function (candidate) {
+		let descriptor = "";
+		if (candidate.w) {
+			descriptor += candidate.w + "w";
+		}
+		if (candidate.h) {
+			descriptor += candidate.h + "h";
+		}
+		if (candidate.d) {
+			descriptor += candidate.d + "x";
+		}
+		return candidate.url + (descriptor ? " " + descriptor : "");
+	}).join(", ");
+}

+ 346 - 0
src/lib/mhtml-to-html/util.js

@@ -0,0 +1,346 @@
+/* global URL, TextDecoder, TextEncoder, btoa, atob */
+
+const EVENT_HANDLER_ATTRIBUTES = [
+    "onafterprint",
+    "onbeforeprint",
+    "onbeforeunload",
+    "onhashchange",
+    "onlanguagechange",
+    "onmessage",
+    "onmessageerror",
+    "onoffline",
+    "ononline",
+    "onpagehide",
+    "onpageshow",
+    "onpopstate",
+    "onrejectionhandled",
+    "onstorage",
+    "onunhandledrejection",
+    "onunload",
+    "ongamepadconnected",
+    "ongamepaddisconnected",
+    "onabort",
+    "onblur",
+    "onfocus",
+    "oncancel",
+    "onauxclick",
+    "onbeforeinput",
+    "onbeforetoggle",
+    "oncanplay",
+    "oncanplaythrough",
+    "onchange",
+    "onclick",
+    "onclose",
+    "oncontentvisibilityautostatechange",
+    "oncontextlost",
+    "oncontextmenu",
+    "oncontextrestored",
+    "oncopy",
+    "oncuechange",
+    "oncut",
+    "ondblclick",
+    "ondrag",
+    "ondragend",
+    "ondragenter",
+    "ondragleave",
+    "ondragover",
+    "ondragstart",
+    "ondrop",
+    "ondurationchange",
+    "onemptied",
+    "onended",
+    "onformdata",
+    "oninput",
+    "oninvalid",
+    "onkeydown",
+    "onkeypress",
+    "onkeyup",
+    "onload",
+    "onloadeddata",
+    "onloadedmetadata",
+    "onloadstart",
+    "onmousedown",
+    "onmouseenter",
+    "onmouseleave",
+    "onmousemove",
+    "onmouseout",
+    "onmouseover",
+    "onmouseup",
+    "onwheel",
+    "onpaste",
+    "onpause",
+    "onplay",
+    "onplaying",
+    "onprogress",
+    "onratechange",
+    "onreset",
+    "onresize",
+    "onscroll",
+    "onscrollend",
+    "onsecuritypolicyviolation",
+    "onseeked",
+    "onseeking",
+    "onselect",
+    "onslotchange",
+    "onstalled",
+    "onsubmit",
+    "onsuspend",
+    "ontimeupdate",
+    "onvolumechange",
+    "onwaiting",
+    "onselectstart",
+    "onselectionchange",
+    "ontoggle",
+    "onpointercancel",
+    "onpointerdown",
+    "onpointerup",
+    "onpointermove",
+    "onpointerout",
+    "onpointerover",
+    "onpointerenter",
+    "onpointerleave",
+    "ongotpointercapture",
+    "onlostpointercapture",
+    "onanimationcancel",
+    "onanimationend",
+    "onanimationiteration",
+    "onanimationstart",
+    "ontransitioncancel",
+    "ontransitionend",
+    "ontransitionrun",
+    "ontransitionstart",
+    "onerror",
+    "onfullscreenchange",
+    "onfullscreenerror"
+];
+
+export {
+    EVENT_HANDLER_ATTRIBUTES,
+    decodeQuotedPrintable,
+    decodeBinary,
+    decodeMimeHeader,
+    parseDOM,
+    decodeBase64,
+    decodeString,
+    encodeString,
+    getCharset,
+    replaceCharset,
+    isDocument,
+    isStylesheet,
+    isText,
+    isMultipartAlternative,
+    getBoundary,
+    indexOf,
+    startsWithBoundary,
+    isLineFeed,
+    endsWithCRLF,
+    endsWithLF,
+    getResourceURI,
+    resolvePath
+};
+
+function decodeQuotedPrintable(array) {
+    const result = [];
+    for (let i = 0; i < array.length; i++) {
+        if (array[i] === 0x3D) {
+            if (isHex(array[i + 1]) && isHex(array[i + 2])) {
+                const hex = parseInt(String.fromCharCode(array[i + 1], array[i + 2]), 16);
+                result.push(hex);
+                i += 2;
+            } else {
+                result.push(array[i]);
+            }
+        } else {
+            result.push(array[i]);
+        }
+    }
+    return new Uint8Array(result);
+
+    function isHex(value) {
+        return value >= 0x30 && value <= 0x39 || value >= 0x41 && value <= 0x46;
+    }
+}
+
+function decodeBinary(array) {
+    let data = "";
+    for (let indexData = 0; indexData < array.length; indexData++) {
+        data += String.fromCharCode(array[indexData]);
+    }
+    return btoa(data);
+}
+
+function decodeBase64(value, charset) {
+    const decodedData = new Uint8Array(atob(value).split("").map(char => char.charCodeAt(0)));
+    return new TextDecoder(charset).decode(decodedData);
+}
+
+function decodeMimeHeader(encodedSubject) {
+    if (encodedSubject && encodedSubject.startsWith("=?") && encodedSubject.endsWith("?=")) {
+        const encodedSubjectParts = [];
+        let index = 0;
+        while (index < encodedSubject.length) {
+            const start = encodedSubject.indexOf("=?", index);
+            if (start === -1) {
+                break;
+            }
+            const endCharset = encodedSubject.indexOf("?", start + 2);
+            if (endCharset === -1) {
+                break;
+            }
+            const charset = encodedSubject.substring(start + 2, endCharset);
+            const endEncoding = encodedSubject.indexOf("?", endCharset + 1);
+            if (endEncoding === -1) {
+                break;
+            }
+            const encoding = encodedSubject.substring(endCharset + 1, endEncoding);
+            const endValue = encodedSubject.indexOf("?=", endEncoding + 1);
+            if (endValue === -1) {
+                break;
+            }
+            const value = encodedSubject.substring(endEncoding + 1, endValue);
+            index = endValue + 2;
+            if (encoding === "Q") {
+                encodedSubjectParts.push(new TextDecoder(charset).decode(decodeQuotedPrintable(new TextEncoder().encode(value))));
+            } else if (encoding === "B") {
+                encodedSubjectParts.push(decodeBase64(value, charset));
+            }
+        }
+        encodedSubject = encodedSubjectParts.join("");
+    }
+    return encodedSubject || "";
+}
+
+function parseDOM(asset, contentType = "text/html", DOMParser = globalThis.DOMParser) {
+    let document;
+    try {
+        document = new DOMParser().parseFromString(asset, contentType);
+        // eslint-disable-next-line no-unused-vars
+    } catch (_) {
+        document = new DOMParser().parseFromString(asset, "text/html");
+    }
+    return {
+        document,
+        serialize() {
+            let result = "";
+            if (this.document.doctype) {
+                result += serializeDocType(this.document.doctype) + "\n";
+            }
+            result += this.document.documentElement.outerHTML;
+            return result;
+        }
+    };
+}
+
+function serializeDocType(doctype) {
+    return `<!DOCTYPE ${doctype.name}${(doctype.publicId ? ` PUBLIC "${doctype.publicId}"` : "")}${(doctype.systemId ? ` "${doctype.systemId}"` : "")}>`;
+}
+
+function decodeString(array, charset) {
+    return new TextDecoder(charset).decode(array);
+}
+
+function encodeString(string, charset) {
+    return new TextEncoder(charset).encode(string);
+}
+
+function getCharset(contentType) {
+    const charsetMatch = contentType.match(/charset=([^;]+)/);
+    if (charsetMatch) {
+        return removeQuotes(charsetMatch[1]).toLowerCase();
+    }
+}
+
+function removeQuotes(value) {
+    return value.replace(/^"(.*)"$/, "$1").replace(/^'(.*)'$/, "$1").trim();
+}
+
+function replaceCharset(contentType, charset) {
+    return contentType.replace(/charset=([^;]+)/, `charset=${charset}`);
+}
+
+function isDocument(contentType) {
+    return contentType.startsWith("text/html") || contentType.startsWith("application/xhtml+xml");
+}
+
+function isStylesheet(contentType) {
+    return contentType.startsWith("text/css");
+}
+
+function isText(contentType) {
+    return contentType.startsWith("text/");
+}
+
+function isMultipartAlternative(contentType) {
+    return contentType.startsWith("multipart/alternative");
+}
+
+function getBoundary(contentType) {
+    const contentTypeParams = contentType.split(";");
+    contentTypeParams.shift();
+    const boundaryParam = contentTypeParams.find(param => param.startsWith("boundary="));
+    if (boundaryParam) {
+        return removeQuotes(boundaryParam.substring(9));
+    }
+}
+
+function indexOf(array, string) {
+    const stringBytes = new TextEncoder().encode(string);
+    for (let i = 0; i < array.length; i++) {
+        if (array[i] === stringBytes[0]) {
+            let match = true;
+            for (let j = 1; j < stringBytes.length; j++) {
+                if (array[i + j] !== stringBytes[j]) {
+                    match = false;
+                    break;
+                }
+            }
+            if (match) {
+                // return index
+                return i;
+            }
+        }
+    }
+    return -1;
+}
+
+function isLineFeed(array) {
+    return array.length == 2 ? array[0] == 0x0D && array[1] == 0x0A : array.length == 1 ? array[0] == 0x0A : false;
+}
+
+function endsWithCRLF(array) {
+    return array.length >= 2 ? array[array.length - 2] == 0x0D && array[array.length - 1] == 0x0A : array.length >= 1 ? array[array.length - 1] == 0x0D : false;
+}
+
+function endsWithLF(array) {
+    return array.length >= 1 ? array[array.length - 1] == 0x0A : false;
+}
+
+function startsWithBoundary(array) {
+    return array.length >= 2 ? array[0] == 0x2D && array[1] == 0x2D : false;
+}
+
+function getResourceURI({ contentType, transferEncoding, data }) {
+    return `data:${contentType};${"base64"},${transferEncoding === "base64" ? data : btoa(unescape(encodeURIComponent(data)))}`;
+}
+
+function resolvePath(path, base) {
+    if (base && !path.startsWith("data:")) {
+        try {
+            return new URL(path, base).href;
+            // eslint-disable-next-line no-unused-vars
+        } catch (_) {
+            if (path.startsWith("//")) {
+                const protocol = base.match(/^[^:]+/);
+                if (protocol) {
+                    return `${protocol[0]}:${path}`;
+                } else {
+                    return path;
+                }
+            } else {
+                return path;
+            }
+        }
+    } else {
+        return path;
+    }
+}

+ 19 - 0
src/lib/mhtml-to-html/vendor/LICENSE.txt

@@ -0,0 +1,19 @@
+Copyright (C) 2016-2024 by Roman Dvornov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

+ 1 - 0
src/lib/mhtml-to-html/vendor/README.md

@@ -0,0 +1 @@
+This folder contains the distributable file of https://github.com/csstree/csstree

Fichier diff supprimé car celui-ci est trop grand
+ 24 - 0
src/lib/mhtml-to-html/vendor/csstree.esm.js


+ 29 - 5
src/ui/bg/ui-editor.js

@@ -21,7 +21,7 @@
  *   Source.
  */
 
-/* global browser, document, matchMedia, addEventListener, navigator, prompt, URL, MouseEvent, Blob, setInterval, DOMParser, fetch, singlefile */
+/* global browser, document, matchMedia, addEventListener, navigator, prompt, URL, MouseEvent, Blob, setInterval, DOMParser, fetch, TextDecoder, singlefile */
 
 import * as download from "../../core/common/download.js";
 import { onError } from "./../common/common-content-ui.js";
@@ -60,6 +60,7 @@ const undoAllCutPageButton = document.querySelector(".undo-all-cut-page-button")
 const redoCutPageButton = document.querySelector(".redo-cut-page-button");
 const savePageButton = document.querySelector(".save-page-button");
 const printPageButton = document.querySelector(".print-page-button");
+const importMhtButton = document.querySelector(".import-mht-button");
 const lastButton = toolbarElement.querySelector(".buttons:last-of-type [type=button]:last-of-type");
 
 let tabData, tabDataContents = [], downloadParser;
@@ -84,6 +85,7 @@ undoAllCutPageButton.title = browser.i18n.getMessage("editorUndoAllCutPage");
 redoCutPageButton.title = browser.i18n.getMessage("editorRedoCutPage");
 savePageButton.title = browser.i18n.getMessage("editorSavePage");
 printPageButton.title = browser.i18n.getMessage("editorPrintPage");
+importMhtButton.title = browser.i18n.getMessage("editorImportMht");
 
 addYellowNoteButton.onmouseup = () => editorElement.contentWindow.postMessage(JSON.stringify({ method: "addNote", color: "note-yellow" }), "*");
 addPinkNoteButton.onmouseup = () => editorElement.contentWindow.postMessage(JSON.stringify({ method: "addNote", color: "note-pink" }), "*");
@@ -218,6 +220,28 @@ if (typeof print == "function") {
 } else {
 	printPageButton.remove();
 }
+importMhtButton.onmouseup = async () => {
+	const fileInput = document.createElement("input");
+	fileInput.type = "file";
+	fileInput.accept = ".mht,.mhtml";
+	fileInput.onchange = async () => {
+		if (fileInput.files && fileInput.files[0]) {
+			const file = fileInput.files[0];
+			let filename = file.name || "Untitled.mht";
+			filename = filename.replace(/(\.mhtml|\.mht)$/i, ".html");
+			if (!filename.endsWith(".html")) {
+				filename += ".html";
+			}
+			let content = new TextDecoder().decode(await file.arrayBuffer());
+			editorElement.contentWindow.postMessage(JSON.stringify({
+				method: "importMht",
+				content,
+				filename
+			}), "*");
+		}
+	};
+	fileInput.click();
+};
 
 let toolbarPositionPointer, toolbarMoving, toolbarTranslateMax;
 let orientationPortrait = matchMedia("(orientation: portrait)").matches;
@@ -438,10 +462,10 @@ async function onMessage(message) {
 			tabData = JSON.parse(tabDataContents.join(""));
 			tabData.options = message.options;
 			tabDataContents = [];
-			editorElement.contentWindow.postMessage(JSON.stringify({ 
-				method: "init", 
-				content: tabData.content, 
-				password: tabData.options.password, 
+			editorElement.contentWindow.postMessage(JSON.stringify({
+				method: "init",
+				content: tabData.content,
+				password: tabData.options.password,
 				compressContent: message.compressContent,
 				url: tabData.url
 			}), "*");

+ 25 - 6
src/ui/content/content-ui-editor-web.js

@@ -25,6 +25,7 @@
 
 import { setLabels } from "./../../ui/common/common-content-ui.js";
 import { downloadPageForeground } from "../../core/common/download.js";
+import { convert } from "../../lib/mhtml-to-html/mod.js";
 
 (globalThis => {
 
@@ -225,6 +226,12 @@ import { downloadPageForeground } from "../../core/common/download.js";
 			if (message.method == "printPage") {
 				printPage();
 			}
+			if (message.method == "importMht") {
+				let { content, filename } = message;
+				const { data } = await convert(content, { DOMParser: globalThis.DOMParser });
+				content = data;
+				await init({ content }, { filename, reset: true, isMHTML: true });
+			}
 			if (message.method == "displayInfobar") {
 				singlefile.helper.displayIcon(document, true, {
 					openInfobar: message.openInfobar,
@@ -262,18 +269,28 @@ import { downloadPageForeground } from "../../core/common/download.js";
 			if (event.dataTransfer.files && event.dataTransfer.files[0]) {
 				const file = event.dataTransfer.files[0];
 				event.preventDefault();
-				const content = new TextDecoder().decode(await file.arrayBuffer());
+				let content = new TextDecoder().decode(await file.arrayBuffer());
 				const compressContent = /<html[^>]* data-sfz[^>]*>/i.test(content);
 				if (compressContent) {
 					await init({ content: file, compressContent }, { filename: file.name });
 				} else {
-					await init({ content }, { filename: file.name });
+					const isMHTML = /\.mhtml?$|\.mht$/i.test(file.name);
+					let filename = file.name || "Untitled.html";
+					filename = filename.replace(/(\.mhtml|\.mht)$/i, ".html");
+					if (!filename.endsWith(".html")) {
+						filename += ".html";
+					}
+					if (isMHTML) {
+						const { data } = await convert(content, { DOMParser: globalThis.DOMParser });
+						content = data;
+					}
+					await init({ content }, { filename, isMHTML });
 				}
 			}
 		};
 	}
 
-	async function init({ content, password, compressContent }, { filename, reset } = {}) {
+	async function init({ content, password, compressContent }, { filename, reset, isMHTML } = {}) {
 		await initConstants();
 		if (compressContent) {
 			const zipOptions = {
@@ -333,9 +350,11 @@ import { downloadPageForeground } from "../../core/common/download.js";
 			}
 		} else {
 			const contentDocument = (new DOMParser()).parseFromString(content, "text/html");
-			if (detectSavedPage(contentDocument)) {
-				const { saveUrl } = singlefile.helper.extractInfobarData(contentDocument);
-				pageUrl = saveUrl;
+			if (detectSavedPage(contentDocument) || isMHTML) {
+				if (!isMHTML) {
+					const { saveUrl } = singlefile.helper.extractInfobarData(contentDocument);
+					pageUrl = saveUrl;
+				}
 				if (contentDocument.doctype) {
 					if (document.doctype) {
 						document.replaceChild(contentDocument.doctype, document.doctype);

+ 2 - 0
src/ui/pages/editor.html

@@ -61,6 +61,8 @@
 			<div class="separator"></div>
 		</div>
 		<div class="buttons">
+			<img type="button" class="import-mht-button" src="/src/ui/resources/button_import_mht.png"
+				draggable="false">
 			<img type="button" class="print-page-button" src="/src/ui/resources/button_print.png"
 				draggable="false">
 			<img type="button" class="save-page-button" src="/src/ui/resources/button_download.png"

BIN
src/ui/resources/button_import_mht.png


Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff