single-file-browser.js 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global SingleFileCore, DOMParser, TextDecoder, Blob, fetch, base64, superFetch, parseSrcset, uglifycss, htmlmini, cssMinifier, fontsMinifier, lazyLoader, serializer, docHelper, mediasMinifier, TextEncoder, crypto, RulesMatcher */
  21. this.SingleFile = this.SingleFile || (() => {
  22. const ONE_MB = 1024 * 1024;
  23. const DEBUG = false;
  24. // --------
  25. // Download
  26. // --------
  27. let fetchResource;
  28. if (this.serializer === undefined) {
  29. this.serializer = {
  30. process(doc) {
  31. const docType = doc.doctype;
  32. let docTypeString = "";
  33. if (docType) {
  34. docTypeString = "<!DOCTYPE " + docType.nodeName;
  35. if (docType.publicId) {
  36. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  37. if (docType.systemId)
  38. docTypeString += " \"" + docType.systemId + "\"";
  39. } else if (docType.systemId)
  40. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  41. if (docType.internalSubset)
  42. docTypeString += " [" + docType.internalSubset + "]";
  43. docTypeString += "> ";
  44. }
  45. return docTypeString + doc.documentElement.outerHTML;
  46. }
  47. };
  48. }
  49. class Download {
  50. static async getContent(resourceURL, options) {
  51. let resourceContent, startTime;
  52. if (DEBUG) {
  53. startTime = Date.now();
  54. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  55. }
  56. if (!fetchResource) {
  57. fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
  58. }
  59. try {
  60. resourceContent = await fetchResource(resourceURL);
  61. } catch (error) {
  62. return options && options.asDataURI ? { empty: true } : { content: "" };
  63. }
  64. if (resourceContent.status >= 400 && superFetch.hostFetch) {
  65. try {
  66. resourceContent = await superFetch.hostFetch(resourceURL);
  67. } catch (error) {
  68. return options && options.asDataURI ? { empty: true } : { content: "" };
  69. }
  70. }
  71. if (resourceContent.status >= 400) {
  72. return options && options.asDataURI ? { empty: true } : "";
  73. }
  74. let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
  75. let charSet;
  76. if (contentType) {
  77. const matchContentType = contentType.toLowerCase().split(";");
  78. contentType = matchContentType[0].trim();
  79. if (contentType.indexOf("/") <= 0) {
  80. contentType = null;
  81. }
  82. const charSetValue = matchContentType[1] && matchContentType[1].trim();
  83. if (charSetValue) {
  84. const matchCharSet = charSetValue.match(/^charset=(.*)/);
  85. if (matchCharSet) {
  86. charSet = removeQuotes(matchCharSet[1]);
  87. }
  88. }
  89. }
  90. if (options && options.asDataURI) {
  91. try {
  92. const buffer = await resourceContent.arrayBuffer();
  93. if (DEBUG) {
  94. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  95. }
  96. const uInt8Array = new Uint8Array(buffer);
  97. const dataURI = "data:" + (contentType || "") + ";" + "base64," + base64.fromByteArray(uInt8Array);
  98. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  99. return { empty: true };
  100. } else {
  101. const hash = await crypto.subtle.digest("SHA-256", uInt8Array);
  102. return { content: dataURI, hash };
  103. }
  104. } catch (error) {
  105. return { empty: true };
  106. }
  107. } else {
  108. if (!charSet) {
  109. const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
  110. if (matchCharset && matchCharset[1] || options.charSet) {
  111. charSet = (matchCharset && matchCharset[1].toLowerCase()) || options.charSet;
  112. }
  113. }
  114. if (!charSet) {
  115. charSet = "utf-8";
  116. }
  117. try {
  118. const arrayBuffer = await resourceContent.arrayBuffer();
  119. if (DEBUG) {
  120. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  121. }
  122. const textContent = (new TextDecoder(charSet)).decode(arrayBuffer);
  123. if (options.maxResourceSizeEnabled && textContent.length > options.maxResourceSize * ONE_MB) {
  124. return { content: "" };
  125. } else {
  126. const hash = await crypto.subtle.digest("SHA-256", arrayBuffer);
  127. return { content: textContent, hash };
  128. }
  129. } catch (error) {
  130. return { content: "" };
  131. }
  132. }
  133. }
  134. }
  135. const REGEXP_SIMPLE_QUOTES_STRING = /^'(.*?)'$/;
  136. const REGEXP_DOUBLE_QUOTES_STRING = /^"(.*?)"$/;
  137. function removeQuotes(string) {
  138. string = string.toLowerCase().trim();
  139. if (string.match(REGEXP_SIMPLE_QUOTES_STRING)) {
  140. string = string.replace(REGEXP_SIMPLE_QUOTES_STRING, "$1");
  141. } else {
  142. string = string.replace(REGEXP_DOUBLE_QUOTES_STRING, "$1");
  143. }
  144. return string.trim();
  145. }
  146. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  147. function hex(buffer) {
  148. var hexCodes = [];
  149. var view = new DataView(buffer);
  150. for (var i = 0; i < view.byteLength; i += 4) {
  151. var value = view.getUint32(i);
  152. var stringValue = value.toString(16);
  153. var padding = "00000000";
  154. var paddedValue = (padding + stringValue).slice(-padding.length);
  155. hexCodes.push(paddedValue);
  156. }
  157. return hexCodes.join("");
  158. }
  159. // ---
  160. // DOM
  161. // ---
  162. class DOM {
  163. static createDoc(pageContent, baseURI) {
  164. const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
  165. let baseElement = doc.querySelector("base");
  166. if (!baseElement || !baseElement.getAttribute("href")) {
  167. if (baseElement) {
  168. baseElement.remove();
  169. }
  170. baseElement = doc.createElement("base");
  171. baseElement.setAttribute("href", baseURI);
  172. doc.head.insertBefore(baseElement, doc.head.firstChild);
  173. }
  174. return doc;
  175. }
  176. static getOnEventAttributeNames(doc) {
  177. const element = doc.createElement("div");
  178. const attributeNames = [];
  179. for (const propertyName in element) {
  180. if (propertyName.startsWith("on")) {
  181. attributeNames.push(propertyName);
  182. }
  183. }
  184. return attributeNames;
  185. }
  186. static getParser() {
  187. return DOMParser;
  188. }
  189. static async digest(algo, text) {
  190. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  191. return (hex(hash));
  192. }
  193. static getContentSize(content) {
  194. return new Blob([content]).size;
  195. }
  196. static minifyHTML(doc, options) {
  197. return htmlmini.process(doc, options);
  198. }
  199. static postMinifyHTML(doc) {
  200. return htmlmini.postProcess(doc);
  201. }
  202. static lazyLoad(doc) {
  203. return lazyLoader.process(doc);
  204. }
  205. static minifyCSS(doc, mediaAllInfo) {
  206. return cssMinifier.process(doc, mediaAllInfo);
  207. }
  208. static minifyFonts(doc, secondPass) {
  209. return fontsMinifier.process(doc, secondPass);
  210. }
  211. static getMediaAllInfo(doc) {
  212. const rulesMatcher = RulesMatcher.create(doc);
  213. return rulesMatcher.getMediaAllInfo();
  214. }
  215. static compressCSS(content, options) {
  216. return uglifycss.processString(content, options);
  217. }
  218. static minifyMedias(doc) {
  219. return mediasMinifier.process(doc);
  220. }
  221. static parseSrcset(srcset) {
  222. return parseSrcset.process(srcset);
  223. }
  224. static preProcessDoc(doc, win, options) {
  225. return docHelper.preProcessDoc(doc, win, options);
  226. }
  227. static postProcessDoc(doc, options) {
  228. docHelper.postProcessDoc(doc, options);
  229. }
  230. static serialize(doc, compressHTML) {
  231. return serializer.process(doc, compressHTML);
  232. }
  233. static lazyLoaderImageSelectors() {
  234. return lazyLoader.imageSelectors;
  235. }
  236. static windowIdAttributeName(sessionId) {
  237. return docHelper.windowIdAttributeName(sessionId);
  238. }
  239. static preservedSpaceAttributeName(sessionId) {
  240. return docHelper.preservedSpaceAttributeName(sessionId);
  241. }
  242. static removedContentAttributeName(sessionId) {
  243. return docHelper.removedContentAttributeName(sessionId);
  244. }
  245. static responsiveImagesAttributeName(sessionId) {
  246. return docHelper.responsiveImagesAttributeName(sessionId);
  247. }
  248. static imagesAttributeName(sessionId) {
  249. return docHelper.imagesAttributeName(sessionId);
  250. }
  251. static inputValueAttributeName(sessionId) {
  252. return docHelper.inputValueAttributeName(sessionId);
  253. }
  254. static sheetAttributeName(sessionId) {
  255. return docHelper.sheetAttributeName(sessionId);
  256. }
  257. }
  258. function log(...args) {
  259. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  260. }
  261. return { getClass: () => SingleFileCore.getClass(Download, DOM, URL) };
  262. })();