single-file-browser.js 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global SingleFileCore, DOMParser, TextDecoder, Blob, fetch, base64, superFetch, parseSrcset, uglifycss, htmlmini, cssMinifier, fontsMinifier, serializer, docHelper, mediasMinifier, TextEncoder, crypto, RulesMatcher, altImages */
  21. this.SingleFile = this.SingleFile || (() => {
  22. const ONE_MB = 1024 * 1024;
  23. const DEBUG = false;
  24. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  25. // --------
  26. // Download
  27. // --------
  28. let fetchResource;
  29. if (this.serializer === undefined) {
  30. this.serializer = {
  31. process(doc) {
  32. const docType = doc.doctype;
  33. let docTypeString = "";
  34. if (docType) {
  35. docTypeString = "<!DOCTYPE " + docType.nodeName;
  36. if (docType.publicId) {
  37. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  38. if (docType.systemId)
  39. docTypeString += " \"" + docType.systemId + "\"";
  40. } else if (docType.systemId)
  41. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  42. if (docType.internalSubset)
  43. docTypeString += " [" + docType.internalSubset + "]";
  44. docTypeString += "> ";
  45. }
  46. return docTypeString + doc.documentElement.outerHTML;
  47. }
  48. };
  49. }
  50. class Download {
  51. static async getContent(resourceURL, options) {
  52. let resourceContent, startTime;
  53. if (DEBUG) {
  54. startTime = Date.now();
  55. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  56. }
  57. if (!fetchResource) {
  58. fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
  59. }
  60. try {
  61. resourceContent = await fetchResource(resourceURL);
  62. } catch (error) {
  63. return options && options.asDataURI ? "data:base64," : "";
  64. }
  65. if (resourceContent.status >= 400) {
  66. return options && options.asDataURI ? "data:base64," : "";
  67. }
  68. let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
  69. let charSet;
  70. if (contentType) {
  71. const matchContentType = contentType.toLowerCase().split(";");
  72. contentType = matchContentType[0].trim();
  73. if (contentType.indexOf("/") <= 0) {
  74. contentType = null;
  75. }
  76. const charSetValue = matchContentType[1] && matchContentType[1].trim();
  77. if (charSetValue) {
  78. const matchCharSet = charSetValue.match(/^charset=(.*)/);
  79. if (matchCharSet) {
  80. charSet = removeQuotes(matchCharSet[1]);
  81. }
  82. }
  83. }
  84. if (options && options.asDataURI) {
  85. try {
  86. const buffer = await resourceContent.arrayBuffer();
  87. if (DEBUG) {
  88. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  89. }
  90. const dataURI = "data:" + (contentType || "") + ";" + "base64," + base64.fromByteArray(new Uint8Array(buffer));
  91. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  92. return "data:base64,";
  93. } else {
  94. return dataURI;
  95. }
  96. } catch (error) {
  97. return "data:base64,";
  98. }
  99. } else {
  100. if (!contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT)) {
  101. return "";
  102. }
  103. if (!charSet) {
  104. const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
  105. if (matchCharset && matchCharset[1] || options.charSet) {
  106. charSet = (matchCharset && matchCharset[1].toLowerCase()) || options.charSet;
  107. }
  108. }
  109. if (!charSet) {
  110. charSet = "utf-8";
  111. }
  112. const arrayBuffer = await resourceContent.arrayBuffer();
  113. if (DEBUG) {
  114. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  115. }
  116. try {
  117. return getTextContent(charSet, arrayBuffer, options);
  118. } catch (error) {
  119. try {
  120. return getTextContent("utf-8", arrayBuffer, options);
  121. } catch (error) {
  122. return "";
  123. }
  124. }
  125. }
  126. }
  127. }
  128. function getTextContent(charSet, arrayBuffer, options) {
  129. const textContent = (new TextDecoder(charSet)).decode(arrayBuffer);
  130. if (options.maxResourceSizeEnabled && textContent.length > options.maxResourceSize * ONE_MB) {
  131. return "";
  132. } else {
  133. return textContent;
  134. }
  135. }
  136. const REGEXP_SIMPLE_QUOTES_STRING = /^'(.*?)'$/;
  137. const REGEXP_DOUBLE_QUOTES_STRING = /^"(.*?)"$/;
  138. function removeQuotes(string) {
  139. string = string.toLowerCase().trim();
  140. if (string.match(REGEXP_SIMPLE_QUOTES_STRING)) {
  141. string = string.replace(REGEXP_SIMPLE_QUOTES_STRING, "$1");
  142. } else {
  143. string = string.replace(REGEXP_DOUBLE_QUOTES_STRING, "$1");
  144. }
  145. return string.trim();
  146. }
  147. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  148. function hex(buffer) {
  149. var hexCodes = [];
  150. var view = new DataView(buffer);
  151. for (var i = 0; i < view.byteLength; i += 4) {
  152. var value = view.getUint32(i);
  153. var stringValue = value.toString(16);
  154. var padding = "00000000";
  155. var paddedValue = (padding + stringValue).slice(-padding.length);
  156. hexCodes.push(paddedValue);
  157. }
  158. return hexCodes.join("");
  159. }
  160. // ---
  161. // DOM
  162. // ---
  163. class DOM {
  164. static createDoc(pageContent, baseURI) {
  165. const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
  166. let baseElement = doc.querySelector("base");
  167. if (!baseElement || !baseElement.getAttribute("href")) {
  168. if (baseElement) {
  169. baseElement.remove();
  170. }
  171. baseElement = doc.createElement("base");
  172. baseElement.setAttribute("href", baseURI);
  173. doc.head.insertBefore(baseElement, doc.head.firstChild);
  174. }
  175. return doc;
  176. }
  177. static getOnEventAttributeNames(doc) {
  178. const element = doc.createElement("div");
  179. const attributeNames = [];
  180. for (const propertyName in element) {
  181. if (propertyName.startsWith("on")) {
  182. attributeNames.push(propertyName);
  183. }
  184. }
  185. return attributeNames;
  186. }
  187. static getParser() {
  188. return DOMParser;
  189. }
  190. static async digest(algo, text) {
  191. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  192. return (hex(hash));
  193. }
  194. static getContentSize(content) {
  195. return new Blob([content]).size;
  196. }
  197. static minifyHTML(doc, options) {
  198. return htmlmini.process(doc, options);
  199. }
  200. static postMinifyHTML(doc) {
  201. return htmlmini.postProcess(doc);
  202. }
  203. static minifyCSS(doc, mediaAllInfo) {
  204. return cssMinifier.process(doc, mediaAllInfo);
  205. }
  206. static removeUnusedFonts(doc, options) {
  207. return fontsMinifier.removeUnusedFonts(doc, options);
  208. }
  209. static removeAlternativeFonts(doc) {
  210. return fontsMinifier.removeAlternativeFonts(doc);
  211. }
  212. static getMediaAllInfo(doc) {
  213. const rulesMatcher = RulesMatcher.create(doc);
  214. return rulesMatcher.getMediaAllInfo();
  215. }
  216. static compressCSS(content, options) {
  217. return uglifycss.processString(content, options);
  218. }
  219. static minifyMedias(doc) {
  220. return mediasMinifier.process(doc);
  221. }
  222. static removeAlternativeImages(doc, options) {
  223. return altImages.process(doc, options);
  224. }
  225. static parseSrcset(srcset) {
  226. return parseSrcset.process(srcset);
  227. }
  228. static preProcessDoc(doc, win, options) {
  229. return docHelper.preProcessDoc(doc, win, options);
  230. }
  231. static postProcessDoc(doc, options) {
  232. docHelper.postProcessDoc(doc, options);
  233. }
  234. static serialize(doc, compressHTML) {
  235. return serializer.process(doc, compressHTML);
  236. }
  237. static windowIdAttributeName(sessionId) {
  238. return docHelper.windowIdAttributeName(sessionId);
  239. }
  240. static preservedSpaceAttributeName(sessionId) {
  241. return docHelper.preservedSpaceAttributeName(sessionId);
  242. }
  243. static removedContentAttributeName(sessionId) {
  244. return docHelper.removedContentAttributeName(sessionId);
  245. }
  246. static imagesAttributeName(sessionId) {
  247. return docHelper.imagesAttributeName(sessionId);
  248. }
  249. static inputValueAttributeName(sessionId) {
  250. return docHelper.inputValueAttributeName(sessionId);
  251. }
  252. static sheetAttributeName(sessionId) {
  253. return docHelper.sheetAttributeName(sessionId);
  254. }
  255. }
  256. function log(...args) {
  257. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  258. }
  259. return { getClass: () => SingleFileCore.getClass(Download, DOM, URL) };
  260. })();