single-file-browser.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global SingleFileCore, DOMParser, TextDecoder, Blob, fetch, base64, superFetch, parseSrcset, uglifycss, htmlmini, cssMinifier, fontsMinifier, lazyLoader, serializer, docHelper, mediasMinifier */
  21. this.SingleFile = this.SingleFile || (() => {
  22. const ONE_MB = 1024 * 1024;
  23. // --------
  24. // Download
  25. // --------
  26. let fetchResource;
  27. if (this.serializer === undefined) {
  28. this.serializer = {
  29. process(doc) {
  30. const docType = doc.doctype;
  31. let docTypeString = "";
  32. if (docType) {
  33. docTypeString = "<!DOCTYPE " + docType.nodeName;
  34. if (docType.publicId) {
  35. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  36. if (docType.systemId)
  37. docTypeString += " \"" + docType.systemId + "\"";
  38. } else if (docType.systemId)
  39. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  40. if (docType.internalSubset)
  41. docTypeString += " [" + docType.internalSubset + "]";
  42. docTypeString += "> ";
  43. }
  44. return docTypeString + doc.documentElement.outerHTML;
  45. }
  46. };
  47. }
  48. class Download {
  49. static async getContent(resourceURL, options) {
  50. let resourceContent;
  51. if (!fetchResource) {
  52. fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
  53. }
  54. try {
  55. resourceContent = await fetchResource(resourceURL);
  56. } catch (error) {
  57. return options && options.asDataURI ? "data:base64," : "";
  58. }
  59. if (resourceContent.status >= 400) {
  60. resourceContent = options && options.asDataURI ? "data:base64," : "";
  61. }
  62. let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
  63. if (contentType) {
  64. contentType = contentType.match(/^([^;]*)/)[0];
  65. }
  66. if (options && options.asDataURI) {
  67. try {
  68. const buffer = await resourceContent.arrayBuffer();
  69. const dataURI = "data:" + (contentType || "") + ";" + "base64," + base64.fromByteArray(new Uint8Array(buffer));
  70. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  71. return "data:base64,";
  72. } else {
  73. return dataURI;
  74. }
  75. } catch (error) {
  76. return "data:base64,";
  77. }
  78. } else {
  79. const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
  80. let charSet;
  81. if (matchCharset && matchCharset[1]) {
  82. charSet = matchCharset[1].toLowerCase();
  83. }
  84. if (!charSet) {
  85. charSet = "utf-8";
  86. }
  87. try {
  88. const arrayBuffer = await resourceContent.arrayBuffer();
  89. const textContent = (new TextDecoder(charSet)).decode(arrayBuffer);
  90. if (options.maxResourceSizeEnabled && textContent.length > options.maxResourceSize * ONE_MB) {
  91. return "";
  92. } else {
  93. return textContent;
  94. }
  95. } catch (error) {
  96. return "";
  97. }
  98. }
  99. }
  100. }
  101. // ---
  102. // DOM
  103. // ---
  104. class DOM {
  105. static createDoc(pageContent, baseURI) {
  106. const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
  107. let baseElement = doc.querySelector("base");
  108. if (!baseElement || !baseElement.getAttribute("href")) {
  109. if (baseElement) {
  110. baseElement.remove();
  111. }
  112. baseElement = doc.createElement("base");
  113. baseElement.setAttribute("href", baseURI);
  114. doc.head.insertBefore(baseElement, doc.head.firstChild);
  115. }
  116. return doc;
  117. }
  118. static getOnEventAttributeNames(doc) {
  119. const element = doc.createElement("div");
  120. const attributeNames = [];
  121. for (let propertyName in element) {
  122. if (propertyName.startsWith("on")) {
  123. attributeNames.push(propertyName);
  124. }
  125. }
  126. return attributeNames;
  127. }
  128. static getParser() {
  129. return DOMParser;
  130. }
  131. static getContentSize(content) {
  132. return new Blob([content]).size;
  133. }
  134. static minifyHTML(doc, options) {
  135. return htmlmini.process(doc, options);
  136. }
  137. static postMinifyHTML(doc) {
  138. return htmlmini.postProcess(doc);
  139. }
  140. static lazyLoad(doc) {
  141. return lazyLoader.process(doc);
  142. }
  143. static minifyCSS(doc) {
  144. return cssMinifier.process(doc);
  145. }
  146. static minifyFonts(doc, secondPass) {
  147. return fontsMinifier.process(doc, secondPass);
  148. }
  149. static compressCSS(content, options) {
  150. return uglifycss.processString(content, options);
  151. }
  152. static minifyMedias(doc) {
  153. return mediasMinifier.process(doc);
  154. }
  155. static parseSrcset(srcset) {
  156. return parseSrcset.process(srcset);
  157. }
  158. static preProcessDoc(doc, win, options) {
  159. return docHelper.preProcessDoc(doc, win, options);
  160. }
  161. static postProcessDoc(doc, options) {
  162. docHelper.postProcessDoc(doc, options);
  163. }
  164. static serialize(doc, compressHTML) {
  165. return serializer.process(doc, compressHTML);
  166. }
  167. static lazyLoaderImageSelectors() {
  168. return lazyLoader.imageSelectors;
  169. }
  170. static windowIdAttributeName(sessionId) {
  171. return docHelper.windowIdAttributeName(sessionId);
  172. }
  173. static preservedSpaceAttributeName(sessionId) {
  174. return docHelper.preservedSpaceAttributeName(sessionId);
  175. }
  176. static removedContentAttributeName(sessionId) {
  177. return docHelper.removedContentAttributeName(sessionId);
  178. }
  179. static responsiveImagesAttributeName(sessionId) {
  180. return docHelper.responsiveImagesAttributeName(sessionId);
  181. }
  182. static inputValueAttributeName(sessionId) {
  183. return docHelper.inputValueAttributeName(sessionId);
  184. }
  185. }
  186. return { getClass: () => SingleFileCore.getClass(Download, DOM, URL) };
  187. })();