single-file-util.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global window */
  24. this.singlefile.lib.util = this.singlefile.lib.util || (() => {
  25. const DEBUG = false;
  26. const ONE_MB = 1024 * 1024;
  27. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  28. const DEFAULT_REPLACED_CHARACTERS = ["~", "\\\\", "?", "%", "*", ":", "|", "\"", "<", ">", "\x00-\x1f", "\x7F"];
  29. const URL = window.URL;
  30. const DOMParser = window.DOMParser;
  31. const Blob = window.Blob;
  32. const FileReader = window.FileReader;
  33. const fetch = window.fetch;
  34. const crypto = window.crypto;
  35. const TextDecoder = window.TextDecoder;
  36. const TextEncoder = window.TextEncoder;
  37. const singlefile = this.singlefile;
  38. return {
  39. getInstance(utilOptions) {
  40. const modules = singlefile.lib.modules;
  41. const vendor = singlefile.lib.vendor;
  42. const helper = singlefile.lib.helper;
  43. if (modules.serializer === undefined) {
  44. modules.serializer = {
  45. process(doc) {
  46. const docType = doc.doctype;
  47. let docTypeString = "";
  48. if (docType) {
  49. docTypeString = "<!DOCTYPE " + docType.nodeName;
  50. if (docType.publicId) {
  51. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  52. if (docType.systemId)
  53. docTypeString += " \"" + docType.systemId + "\"";
  54. } else if (docType.systemId)
  55. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  56. if (docType.internalSubset)
  57. docTypeString += " [" + docType.internalSubset + "]";
  58. docTypeString += "> ";
  59. }
  60. return docTypeString + doc.documentElement.outerHTML;
  61. }
  62. };
  63. }
  64. utilOptions = utilOptions || {};
  65. utilOptions.fetch = utilOptions.fetch || fetch;
  66. utilOptions.frameFetch = utilOptions.frameFetch || utilOptions.fetch || fetch;
  67. return {
  68. getContent,
  69. parseURL(resourceURL, baseURI) {
  70. if (baseURI === undefined) {
  71. return new URL(resourceURL);
  72. } else {
  73. return new URL(resourceURL, baseURI);
  74. }
  75. },
  76. resolveURL(resourceURL, baseURI) {
  77. return this.parseURL(resourceURL, baseURI).href;
  78. },
  79. getValidFilename(filename, replacedCharacters = DEFAULT_REPLACED_CHARACTERS, replacementCharacter) {
  80. replacedCharacters.forEach(replacedCharacter => filename = filename.replace(new RegExp("[" + replacedCharacter + "]+", "g"), replacementCharacter));
  81. filename = filename
  82. .replace(/\.\.\//g, "")
  83. .replace(/^\/+/, "")
  84. .replace(/\/+/g, "/")
  85. .replace(/\/$/, "")
  86. .replace(/\.$/, "")
  87. .replace(/\.\//g, "." + replacementCharacter)
  88. .replace(/\/\./g, "/" + replacementCharacter);
  89. return filename;
  90. },
  91. parseDocContent(content, baseURI) {
  92. const doc = (new DOMParser()).parseFromString(content, "text/html");
  93. if (!doc.head) {
  94. doc.documentElement.insertBefore(doc.createElement("HEAD"), doc.body);
  95. }
  96. let baseElement = doc.querySelector("base");
  97. if (!baseElement || !baseElement.getAttribute("href")) {
  98. if (baseElement) {
  99. baseElement.remove();
  100. }
  101. baseElement = doc.createElement("base");
  102. baseElement.setAttribute("href", baseURI);
  103. doc.head.insertBefore(baseElement, doc.head.firstChild);
  104. }
  105. return doc;
  106. },
  107. parseXMLContent(content) {
  108. return (new DOMParser()).parseFromString(content, "text/xml");
  109. },
  110. parseSVGContent(content) {
  111. return (new DOMParser()).parseFromString(content, "image/svg+xml");
  112. },
  113. async digest(algo, text) {
  114. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  115. return hex(hash);
  116. },
  117. getContentSize(content) {
  118. return new Blob([content]).size;
  119. },
  120. async truncateText(content, maxSize) {
  121. const blob = new Blob([content]);
  122. const reader = new FileReader();
  123. reader.readAsText(blob.slice(0, maxSize));
  124. return new Promise((resolve, reject) => {
  125. reader.addEventListener("load", () => {
  126. if (content.startsWith(reader.result)) {
  127. resolve(reader.result);
  128. } else {
  129. this.truncateText(content, maxSize - 1).then(resolve).catch(reject);
  130. }
  131. }, false);
  132. reader.addEventListener("error", reject, false);
  133. });
  134. },
  135. minifyHTML(doc, options) {
  136. return modules.htmlMinifier.process(doc, options);
  137. },
  138. postMinifyHTML(doc) {
  139. return modules.htmlMinifier.postProcess(doc);
  140. },
  141. minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  142. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  143. },
  144. removeUnusedFonts(doc, stylesheets, styles, options) {
  145. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  146. },
  147. removeAlternativeFonts(doc, stylesheets) {
  148. return modules.fontsAltMinifier.process(doc, stylesheets);
  149. },
  150. getMediaAllInfo(doc, stylesheets, styles) {
  151. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  152. },
  153. compressCSS(content, options) {
  154. return vendor.cssMinifier.processString(content, options);
  155. },
  156. minifyMedias(stylesheets) {
  157. return modules.mediasAltMinifier.process(stylesheets);
  158. },
  159. removeAlternativeImages(doc) {
  160. return modules.imagesAltMinifier.process(doc);
  161. },
  162. parseSrcset(srcset) {
  163. return vendor.srcsetParser.process(srcset);
  164. },
  165. preProcessDoc(doc, win, options) {
  166. return helper.preProcessDoc(doc, win, options);
  167. },
  168. postProcessDoc(doc, markedElements) {
  169. helper.postProcessDoc(doc, markedElements);
  170. },
  171. serialize(doc, compressHTML) {
  172. return modules.serializer.process(doc, compressHTML);
  173. },
  174. removeQuotes(string) {
  175. return helper.removeQuotes(string);
  176. },
  177. async waitForUserScript(eventPrefixName) {
  178. if (helper.waitForUserScript) {
  179. return helper.waitForUserScript(eventPrefixName);
  180. } else {
  181. return Promise.resolve();
  182. }
  183. },
  184. ON_BEFORE_CAPTURE_EVENT_NAME: helper.ON_BEFORE_CAPTURE_EVENT_NAME,
  185. ON_AFTER_CAPTURE_EVENT_NAME: helper.ON_AFTER_CAPTURE_EVENT_NAME,
  186. WIN_ID_ATTRIBUTE_NAME: helper.WIN_ID_ATTRIBUTE_NAME,
  187. REMOVED_CONTENT_ATTRIBUTE_NAME: helper.REMOVED_CONTENT_ATTRIBUTE_NAME,
  188. HIDDEN_CONTENT_ATTRIBUTE_NAME: helper.HIDDEN_CONTENT_ATTRIBUTE_NAME,
  189. IMAGE_ATTRIBUTE_NAME: helper.IMAGE_ATTRIBUTE_NAME,
  190. POSTER_ATTRIBUTE_NAME: helper.POSTER_ATTRIBUTE_NAME,
  191. CANVAS_ATTRIBUTE_NAME: helper.CANVAS_ATTRIBUTE_NAME,
  192. HTML_IMPORT_ATTRIBUTE_NAME: helper.HTML_IMPORT_ATTRIBUTE_NAME,
  193. INPUT_VALUE_ATTRIBUTE_NAME: helper.INPUT_VALUE_ATTRIBUTE_NAME,
  194. SHADOW_ROOT_ATTRIBUTE_NAME: helper.SHADOW_ROOT_ATTRIBUTE_NAME,
  195. PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME: helper.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME,
  196. STYLESHEET_ATTRIBUTE_NAME: helper.STYLESHEET_ATTRIBUTE_NAME,
  197. SELECTED_CONTENT_ATTRIBUTE_NAME: helper.SELECTED_CONTENT_ATTRIBUTE_NAME
  198. };
  199. async function getContent(resourceURL, options) {
  200. let response, startTime;
  201. const fetchResource = utilOptions.fetch;
  202. const fetchFrameResource = utilOptions.frameFetch;
  203. if (DEBUG) {
  204. startTime = Date.now();
  205. log(" // STARTED download url =", resourceURL, "asBinary =", options.asBinary);
  206. }
  207. try {
  208. if (options.frameId) {
  209. response = await fetchFrameResource(resourceURL, options.frameId);
  210. } else {
  211. response = await fetchResource(resourceURL);
  212. }
  213. } catch (error) {
  214. return { data: options.asBinary ? "data:base64," : "", resourceURL };
  215. }
  216. const buffer = await response.arrayBuffer();
  217. resourceURL = response.url || resourceURL;
  218. let contentType = response.headers.get("content-type");
  219. let charset;
  220. if (contentType) {
  221. const matchContentType = contentType.toLowerCase().split(";");
  222. contentType = matchContentType[0].trim();
  223. if (!contentType.includes("/")) {
  224. contentType = null;
  225. }
  226. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  227. if (charsetValue) {
  228. const matchCharset = charsetValue.match(/^charset=(.*)/);
  229. if (matchCharset && matchCharset[1]) {
  230. charset = helper.removeQuotes(matchCharset[1].trim());
  231. }
  232. }
  233. }
  234. if (!charset && options.charset) {
  235. charset = options.charset;
  236. }
  237. if (options.asBinary) {
  238. try {
  239. if (DEBUG) {
  240. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  241. }
  242. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  243. return { data: "data:base64,", resourceURL };
  244. } else {
  245. const reader = new FileReader();
  246. reader.readAsDataURL(new Blob([buffer], { type: contentType }));
  247. const dataUri = await new Promise((resolve, reject) => {
  248. reader.addEventListener("load", () => resolve(reader.result), false);
  249. reader.addEventListener("error", reject, false);
  250. });
  251. return { data: dataUri, resourceURL };
  252. }
  253. } catch (error) {
  254. return { data: "data:base64,", resourceURL };
  255. }
  256. } else {
  257. if (response.status >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  258. return { data: "", resourceURL };
  259. }
  260. if (!charset) {
  261. charset = "utf-8";
  262. }
  263. if (DEBUG) {
  264. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  265. }
  266. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  267. return { data: "", resourceURL, charset };
  268. } else {
  269. try {
  270. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  271. } catch (error) {
  272. try {
  273. charset = "utf-8";
  274. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  275. } catch (error) {
  276. return { data: "", resourceURL, charset };
  277. }
  278. }
  279. }
  280. }
  281. }
  282. }
  283. };
  284. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  285. function hex(buffer) {
  286. const hexCodes = [];
  287. const view = new DataView(buffer);
  288. for (let i = 0; i < view.byteLength; i += 4) {
  289. const value = view.getUint32(i);
  290. const stringValue = value.toString(16);
  291. const padding = "00000000";
  292. const paddedValue = (padding + stringValue).slice(-padding.length);
  293. hexCodes.push(paddedValue);
  294. }
  295. return hexCodes.join("");
  296. }
  297. function log(...args) {
  298. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  299. }
  300. })();