single-file-browser.js 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global
  21. crypto,
  22. cssMinifier,
  23. cssRulesMinifier,
  24. cssTree,
  25. docHelper,
  26. fetch,
  27. fontsAltMinifier,
  28. fontsMinifier,
  29. htmlMinifier,
  30. imagesAltMinifier,
  31. matchedRules,
  32. mediasMinifier,
  33. serializer,
  34. setTimeout,
  35. srcsetParser,
  36. superFetch,
  37. Blob,
  38. DOMParser,
  39. FileReader,
  40. FontFace
  41. SingleFileCore,
  42. TextDecoder,
  43. TextEncoder,
  44. URL */
  45. this.SingleFileBrowser = this.SingleFileBrowser || (() => {
  46. const ONE_MB = 1024 * 1024;
  47. const DEBUG = false;
  48. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  49. const FONT_FACE_TEST_MAX_DELAY = 1000;
  50. // --------
  51. // Download
  52. // --------
  53. let fetchResource;
  54. if (this.serializer === undefined) {
  55. this.serializer = {
  56. process(doc) {
  57. const docType = doc.doctype;
  58. let docTypeString = "";
  59. if (docType) {
  60. docTypeString = "<!DOCTYPE " + docType.nodeName;
  61. if (docType.publicId) {
  62. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  63. if (docType.systemId)
  64. docTypeString += " \"" + docType.systemId + "\"";
  65. } else if (docType.systemId)
  66. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  67. if (docType.internalSubset)
  68. docTypeString += " [" + docType.internalSubset + "]";
  69. docTypeString += "> ";
  70. }
  71. return docTypeString + doc.documentElement.outerHTML;
  72. }
  73. };
  74. }
  75. async function getContent(resourceURL, options) {
  76. let resourceContent, startTime;
  77. if (DEBUG) {
  78. startTime = Date.now();
  79. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  80. }
  81. if (!fetchResource) {
  82. fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
  83. }
  84. try {
  85. resourceContent = await fetchResource(resourceURL);
  86. if (resourceContent.url) {
  87. resourceURL = resourceContent.url;
  88. }
  89. } catch (error) {
  90. return { data: options && options.asDataURI ? "data:base64," : "", resourceURL };
  91. }
  92. let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
  93. let charset;
  94. if (contentType) {
  95. const matchContentType = contentType.toLowerCase().split(";");
  96. contentType = matchContentType[0].trim();
  97. if (!contentType.includes("/")) {
  98. contentType = null;
  99. }
  100. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  101. if (charsetValue) {
  102. const matchCharset = charsetValue.match(/^charset=(.*)/);
  103. if (matchCharset) {
  104. charset = docHelper.removeQuotes(matchCharset[1]);
  105. }
  106. }
  107. }
  108. if (options && options.asDataURI) {
  109. try {
  110. if (DEBUG) {
  111. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  112. }
  113. const buffer = await resourceContent.arrayBuffer();
  114. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  115. return { data: "data:base64,", resourceURL };
  116. } else {
  117. const reader = new FileReader();
  118. reader.readAsDataURL(new Blob([buffer], { type: contentType }));
  119. const dataURI = await new Promise((resolve, reject) => {
  120. reader.addEventListener("load", () => resolve(reader.result), false);
  121. reader.addEventListener("error", reject, false);
  122. });
  123. return { data: dataURI, resourceURL };
  124. }
  125. } catch (error) {
  126. return { data: "data:base64,", resourceURL };
  127. }
  128. } else {
  129. if (resourceContent.status >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  130. return { data: "", resourceURL };
  131. }
  132. if (!charset) {
  133. const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
  134. if (matchCharset && matchCharset[1] || options.charset) {
  135. charset = (matchCharset && matchCharset[1].toLowerCase()) || options.charset;
  136. }
  137. }
  138. if (!charset) {
  139. charset = "utf-8";
  140. }
  141. let buffer;
  142. try {
  143. buffer = await resourceContent.arrayBuffer();
  144. } catch (error) {
  145. return { data: "", resourceURL, charset };
  146. }
  147. if (DEBUG) {
  148. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  149. }
  150. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  151. return { data: "", resourceURL, charset };
  152. } else {
  153. try {
  154. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  155. } catch (error) {
  156. try {
  157. charset = "utf-8";
  158. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  159. } catch (error) {
  160. return { data: "", resourceURL, charset };
  161. }
  162. }
  163. }
  164. }
  165. }
  166. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  167. function hex(buffer) {
  168. const hexCodes = [];
  169. const view = new DataView(buffer);
  170. for (let i = 0; i < view.byteLength; i += 4) {
  171. const value = view.getUint32(i);
  172. const stringValue = value.toString(16);
  173. const padding = "00000000";
  174. const paddedValue = (padding + stringValue).slice(-padding.length);
  175. hexCodes.push(paddedValue);
  176. }
  177. return hexCodes.join("");
  178. }
  179. // ---
  180. // DocUtil
  181. // ---
  182. class DocUtil {
  183. static async getContent(resourceURL, options) {
  184. return getContent(resourceURL, options);
  185. }
  186. static parseURL(resourceURL, baseURI) {
  187. return new URL(resourceURL, baseURI);
  188. }
  189. static resolveURL(resourceURL, baseURI) {
  190. return this.parseURL(resourceURL, baseURI).href;
  191. }
  192. static createDoc(pageContent, baseURI) {
  193. const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
  194. let baseElement = doc.querySelector("base");
  195. if (!baseElement || !baseElement.getAttribute("href")) {
  196. if (baseElement) {
  197. baseElement.remove();
  198. }
  199. baseElement = doc.createElement("base");
  200. baseElement.setAttribute("href", baseURI);
  201. doc.head.insertBefore(baseElement, doc.head.firstChild);
  202. }
  203. return doc;
  204. }
  205. static getParser() {
  206. return DOMParser;
  207. }
  208. static async digest(algo, text) {
  209. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  210. return (hex(hash));
  211. }
  212. static getContentSize(content) {
  213. return new Blob([content]).size;
  214. }
  215. static async validFont(urlFunction) {
  216. try {
  217. const font = new FontFace("font-test", urlFunction);
  218. await Promise.race([font.load(), new Promise(resolve => setTimeout(() => resolve(true), FONT_FACE_TEST_MAX_DELAY))]);
  219. return true;
  220. } catch (error) {
  221. return false;
  222. }
  223. }
  224. static minifyHTML(doc, options) {
  225. return htmlMinifier.process(doc, options);
  226. }
  227. static postMinifyHTML(doc) {
  228. return htmlMinifier.postProcess(doc);
  229. }
  230. static minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  231. return cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  232. }
  233. static removeUnusedFonts(doc, stylesheets, styles, options) {
  234. return fontsMinifier.process(doc, stylesheets, styles, options);
  235. }
  236. static removeAlternativeFonts(doc, stylesheets) {
  237. return fontsAltMinifier.process(doc, stylesheets);
  238. }
  239. static getMediaAllInfo(doc, stylesheets, styles) {
  240. return matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  241. }
  242. static compressCSS(content, options) {
  243. return cssMinifier.processString(content, options);
  244. }
  245. static minifyMedias(stylesheets) {
  246. return mediasMinifier.process(stylesheets);
  247. }
  248. static removeAlternativeImages(doc, options) {
  249. return imagesAltMinifier.process(doc, options);
  250. }
  251. static parseSrcset(srcset) {
  252. return srcsetParser.process(srcset);
  253. }
  254. static preProcessDoc(doc, win, options) {
  255. return docHelper.preProcessDoc(doc, win, options);
  256. }
  257. static postProcessDoc(doc, options) {
  258. docHelper.postProcessDoc(doc, options);
  259. }
  260. static serialize(doc, compressHTML) {
  261. return serializer.process(doc, compressHTML);
  262. }
  263. static removeQuotes(string) {
  264. return docHelper.removeQuotes(string);
  265. }
  266. static windowIdAttributeName(sessionId) {
  267. return docHelper.windowIdAttributeName(sessionId);
  268. }
  269. static preservedSpaceAttributeName(sessionId) {
  270. return docHelper.preservedSpaceAttributeName(sessionId);
  271. }
  272. static removedContentAttributeName(sessionId) {
  273. return docHelper.removedContentAttributeName(sessionId);
  274. }
  275. static imagesAttributeName(sessionId) {
  276. return docHelper.imagesAttributeName(sessionId);
  277. }
  278. static inputValueAttributeName(sessionId) {
  279. return docHelper.inputValueAttributeName(sessionId);
  280. }
  281. static shadowRootAttributeName(sessionId) {
  282. return docHelper.shadowRootAttributeName(sessionId);
  283. }
  284. }
  285. function log(...args) {
  286. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  287. }
  288. return { getClass: () => SingleFileCore.getClass(DocUtil, cssTree) };
  289. })();