| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- /*
- * Copyright 2018 Gildas Lormeau
- * contact : gildas.lormeau <at> gmail.com
- *
- * This file is part of SingleFile.
- *
- * SingleFile is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * SingleFile is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
- */
- /* global SingleFileCore, btoa, DOMParser, getComputedStyle, TextDecoder, window, fetch */
- this.SingleFile = (() => {
- // --------
- // Download
- // --------
- const USER_AGENT = "Mozilla/5.0 (compatible; SingleFile Bot/1.0)";
- class Download {
- static async getContent(resourceURL, asDataURI) {
- const requestOptions = {
- method: "GET",
- headers: {
- "User-Agent": USER_AGENT
- }
- };
- let resourceContent;
- try {
- resourceContent = await (window.superFetch || fetch)(resourceURL, requestOptions);
- } catch (e) {
- return asDataURI ? "data:base64," : "";
- }
- const contentType = resourceContent.headers.get("content-type");
- if (asDataURI) {
- try {
- const buffer = await resourceContent.arrayBuffer();
- const bytes = new Uint8Array(buffer);
- let base64Content = "";
- bytes.forEach(byte => base64Content += String.fromCharCode(byte));
- return "data:" + (contentType || "") + ";" + "base64," + btoa(base64Content);
- } catch (e) {
- return "data:base64,";
- }
- } else {
- const matchCharset = contentType.match(/\s*;\s*charset\s*=\s*(.*)(;|$)/i);
- if (matchCharset && matchCharset[1]) {
- const charSet = matchCharset[1].toLowerCase();
- if (charSet != "utf-8") {
- const arrayBuffer = await resourceContent.arrayBuffer();
- return (new TextDecoder(charSet)).decode(arrayBuffer);
- } else {
- return resourceContent.text();
- }
- } else {
- return resourceContent.text();
- }
- }
- }
- }
- // ---
- // DOM
- // ---
- class DOM {
- static create(pageContent/*, url*/) {
- const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
- return {
- DOMParser,
- getComputedStyle,
- document: doc,
- serialize: () => getDoctype(doc) + doc.documentElement.outerHTML
- };
- }
- }
- function getDoctype(doc) {
- const docType = doc.doctype;
- let docTypeString;
- if (docType) {
- docTypeString = "<!DOCTYPE " + docType.nodeName;
- if (docType.publicId) {
- docTypeString += " PUBLIC \"" + docType.publicId + "\"";
- if (docType.systemId)
- docTypeString += " \"" + docType.systemId + "\"";
- } else if (docType.systemId)
- docTypeString += " SYSTEM \"" + docType.systemId + "\"";
- if (docType.internalSubset)
- docTypeString += " [" + docType.internalSubset + "]";
- return docTypeString + ">\n";
- }
- return "";
- }
- return SingleFileCore(Download, DOM, URL);
- })();
|