single-file-browser.js 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global SingleFileCore, DOMParser, URL, setTimeout, TextDecoder, Blob, fetch, FileReader, superFetch, srcsetParser, cssMinifier, htmlMinifier, cssRulesMinifier, fontsMinifier, fontsAltMinifier, serializer, docHelper, mediasMinifier, TextEncoder, crypto, matchedRules, imagesAltMinifier, FontFace, cssTree */
  21. this.SingleFileBrowser = this.SingleFileBrowser || (() => {
  22. const ONE_MB = 1024 * 1024;
  23. const DEBUG = false;
  24. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  25. const FONT_FACE_TEST_MAX_DELAY = 1000;
  26. // --------
  27. // Download
  28. // --------
  29. let fetchResource;
  30. if (this.serializer === undefined) {
  31. this.serializer = {
  32. process(doc) {
  33. const docType = doc.doctype;
  34. let docTypeString = "";
  35. if (docType) {
  36. docTypeString = "<!DOCTYPE " + docType.nodeName;
  37. if (docType.publicId) {
  38. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  39. if (docType.systemId)
  40. docTypeString += " \"" + docType.systemId + "\"";
  41. } else if (docType.systemId)
  42. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  43. if (docType.internalSubset)
  44. docTypeString += " [" + docType.internalSubset + "]";
  45. docTypeString += "> ";
  46. }
  47. return docTypeString + doc.documentElement.outerHTML;
  48. }
  49. };
  50. }
  51. async function getContent(resourceURL, options) {
  52. let resourceContent, startTime;
  53. if (DEBUG) {
  54. startTime = Date.now();
  55. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  56. }
  57. if (!fetchResource) {
  58. fetchResource = typeof superFetch == "undefined" ? fetch : superFetch.fetch;
  59. }
  60. try {
  61. resourceContent = await fetchResource(resourceURL);
  62. if (resourceContent.url) {
  63. resourceURL = resourceContent.url;
  64. }
  65. } catch (error) {
  66. return { data: options && options.asDataURI ? "data:base64," : "", resourceURL };
  67. }
  68. let contentType = resourceContent.headers && resourceContent.headers.get("content-type");
  69. let charset;
  70. if (contentType) {
  71. const matchContentType = contentType.toLowerCase().split(";");
  72. contentType = matchContentType[0].trim();
  73. if (contentType.indexOf("/") <= 0) {
  74. contentType = null;
  75. }
  76. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  77. if (charsetValue) {
  78. const matchCharset = charsetValue.match(/^charset=(.*)/);
  79. if (matchCharset) {
  80. charset = docHelper.removeQuotes(matchCharset[1]);
  81. }
  82. }
  83. }
  84. if (options && options.asDataURI) {
  85. try {
  86. if (DEBUG) {
  87. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  88. }
  89. const buffer = await resourceContent.arrayBuffer();
  90. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  91. return { data: "data:base64,", resourceURL };
  92. } else {
  93. const reader = new FileReader();
  94. reader.readAsDataURL(new Blob([buffer], { type: contentType }));
  95. const dataURI = await new Promise((resolve, reject) => {
  96. reader.addEventListener("load", () => resolve(reader.result), false);
  97. reader.addEventListener("error", reject, false);
  98. });
  99. return { data: dataURI, resourceURL };
  100. }
  101. } catch (error) {
  102. return { data: "data:base64,", resourceURL };
  103. }
  104. } else {
  105. if (resourceContent.status >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  106. return { data: "", resourceURL };
  107. }
  108. if (!charset) {
  109. const matchCharset = contentType && contentType.match(/\s*;\s*charset\s*=\s*"?([^";]*)"?(;|$)/i);
  110. if (matchCharset && matchCharset[1] || options.charset) {
  111. charset = (matchCharset && matchCharset[1].toLowerCase()) || options.charset;
  112. }
  113. }
  114. if (!charset) {
  115. charset = "utf-8";
  116. }
  117. let buffer;
  118. try {
  119. buffer = await resourceContent.arrayBuffer();
  120. } catch (error) {
  121. return { data: "", resourceURL };
  122. }
  123. if (DEBUG) {
  124. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  125. }
  126. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  127. return { data: "", resourceURL };
  128. } else {
  129. try {
  130. return { data: new TextDecoder(charset).decode(buffer), resourceURL };
  131. } catch (error) {
  132. try {
  133. return { data: new TextDecoder("utf-8").decode(buffer), resourceURL };
  134. } catch (error) {
  135. return { data: "", resourceURL };
  136. }
  137. }
  138. }
  139. }
  140. }
  141. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  142. function hex(buffer) {
  143. var hexCodes = [];
  144. var view = new DataView(buffer);
  145. for (var i = 0; i < view.byteLength; i += 4) {
  146. var value = view.getUint32(i);
  147. var stringValue = value.toString(16);
  148. var padding = "00000000";
  149. var paddedValue = (padding + stringValue).slice(-padding.length);
  150. hexCodes.push(paddedValue);
  151. }
  152. return hexCodes.join("");
  153. }
  154. // ---
  155. // docUtil
  156. // ---
  157. class docUtil {
  158. static async getContent(resourceURL, options) {
  159. return getContent(resourceURL, options);
  160. }
  161. static parseURL(resourceURL, baseURI) {
  162. return new URL(resourceURL, baseURI);
  163. }
  164. static resolveURL(resourceURL, baseURI) {
  165. return this.parseURL(resourceURL, baseURI).href;
  166. }
  167. static createDoc(pageContent, baseURI) {
  168. const doc = (new DOMParser()).parseFromString(pageContent, "text/html");
  169. let baseElement = doc.querySelector("base");
  170. if (!baseElement || !baseElement.getAttribute("href")) {
  171. if (baseElement) {
  172. baseElement.remove();
  173. }
  174. baseElement = doc.createElement("base");
  175. baseElement.setAttribute("href", baseURI);
  176. doc.head.insertBefore(baseElement, doc.head.firstChild);
  177. }
  178. return doc;
  179. }
  180. static getOnEventAttributeNames(doc) {
  181. const element = doc.createElement("div");
  182. const attributeNames = [];
  183. for (const propertyName in element) {
  184. if (propertyName.startsWith("on")) {
  185. attributeNames.push(propertyName);
  186. }
  187. }
  188. return attributeNames;
  189. }
  190. static getParser() {
  191. return DOMParser;
  192. }
  193. static async digest(algo, text) {
  194. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  195. return (hex(hash));
  196. }
  197. static getContentSize(content) {
  198. return new Blob([content]).size;
  199. }
  200. static async validFont(urlFunction) {
  201. try {
  202. const font = new FontFace("font-test", urlFunction);
  203. await Promise.race([font.load(), new Promise(resolve => setTimeout(() => resolve(true), FONT_FACE_TEST_MAX_DELAY))]);
  204. return true;
  205. } catch (error) {
  206. return false;
  207. }
  208. }
  209. static minifyHTML(doc, options) {
  210. return htmlMinifier.process(doc, options);
  211. }
  212. static postMinifyHTML(doc) {
  213. return htmlMinifier.postProcess(doc);
  214. }
  215. static minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  216. return cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  217. }
  218. static removeUnusedFonts(doc, stylesheets, styles, options) {
  219. return fontsMinifier.process(doc, stylesheets, styles, options);
  220. }
  221. static removeAlternativeFonts(doc, stylesheets) {
  222. return fontsAltMinifier.process(doc, stylesheets);
  223. }
  224. static getMediaAllInfo(doc, stylesheets, styles) {
  225. return matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  226. }
  227. static compressCSS(content, options) {
  228. return cssMinifier.processString(content, options);
  229. }
  230. static minifyMedias(stylesheets) {
  231. return mediasMinifier.process(stylesheets);
  232. }
  233. static removeAlternativeImages(doc, options) {
  234. return imagesAltMinifier.process(doc, options);
  235. }
  236. static parseSrcset(srcset) {
  237. return srcsetParser.process(srcset);
  238. }
  239. static preProcessDoc(doc, win, options) {
  240. return docHelper.preProcessDoc(doc, win, options);
  241. }
  242. static postProcessDoc(doc, options) {
  243. docHelper.postProcessDoc(doc, options);
  244. }
  245. static serialize(doc, compressHTML) {
  246. return serializer.process(doc, compressHTML);
  247. }
  248. static removeQuotes(string) {
  249. return docHelper.removeQuotes(string);
  250. }
  251. static windowIdAttributeName(sessionId) {
  252. return docHelper.windowIdAttributeName(sessionId);
  253. }
  254. static preservedSpaceAttributeName(sessionId) {
  255. return docHelper.preservedSpaceAttributeName(sessionId);
  256. }
  257. static removedContentAttributeName(sessionId) {
  258. return docHelper.removedContentAttributeName(sessionId);
  259. }
  260. static imagesAttributeName(sessionId) {
  261. return docHelper.imagesAttributeName(sessionId);
  262. }
  263. static inputValueAttributeName(sessionId) {
  264. return docHelper.inputValueAttributeName(sessionId);
  265. }
  266. static shadowRootAttributeName(sessionId) {
  267. return docHelper.shadowRootAttributeName(sessionId);
  268. }
  269. }
  270. function log(...args) {
  271. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  272. }
  273. return { getClass: () => SingleFileCore.getClass(docUtil, cssTree) };
  274. })();