single-file-util.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global window, globalThis */
  24. this.singlefile.lib.util = this.singlefile.lib.util || (globalThis => {
  25. const DEBUG = false;
  26. const ONE_MB = 1024 * 1024;
  27. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  28. const DEFAULT_REPLACED_CHARACTERS = ["~", "+", "\\\\", "?", "%", "*", ":", "|", "\"", "<", ">", "\x00-\x1f", "\x7F"];
  29. const DEFAULT_REPLACEMENT_CHARACTER = "_";
  30. const URL = globalThis.URL;
  31. const DOMParser = globalThis.DOMParser;
  32. const Blob = globalThis.Blob;
  33. const FileReader = globalThis.FileReader;
  34. const fetch = url => globalThis.fetch(url);
  35. const crypto = globalThis.crypto;
  36. const TextDecoder = globalThis.TextDecoder;
  37. const TextEncoder = globalThis.TextEncoder;
  38. const singlefile = this.singlefile;
  39. return {
  40. getInstance
  41. };
  42. function getInstance(utilOptions) {
  43. const modules = singlefile.lib.modules;
  44. const vendor = singlefile.lib.vendor;
  45. const helper = singlefile.lib.helper;
  46. if (modules.serializer === undefined) {
  47. modules.serializer = {
  48. process(doc) {
  49. const docType = doc.doctype;
  50. let docTypeString = "";
  51. if (docType) {
  52. docTypeString = "<!DOCTYPE " + docType.nodeName;
  53. if (docType.publicId) {
  54. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  55. if (docType.systemId)
  56. docTypeString += " \"" + docType.systemId + "\"";
  57. } else if (docType.systemId)
  58. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  59. if (docType.internalSubset)
  60. docTypeString += " [" + docType.internalSubset + "]";
  61. docTypeString += "> ";
  62. }
  63. return docTypeString + doc.documentElement.outerHTML;
  64. }
  65. };
  66. }
  67. utilOptions = utilOptions || {};
  68. utilOptions.fetch = utilOptions.fetch || fetch;
  69. utilOptions.frameFetch = utilOptions.frameFetch || utilOptions.fetch || fetch;
  70. return {
  71. getContent,
  72. parseURL(resourceURL, baseURI) {
  73. if (baseURI === undefined) {
  74. return new URL(resourceURL);
  75. } else {
  76. return new URL(resourceURL, baseURI);
  77. }
  78. },
  79. resolveURL(resourceURL, baseURI) {
  80. return this.parseURL(resourceURL, baseURI).href;
  81. },
  82. getValidFilename(filename, replacedCharacters = DEFAULT_REPLACED_CHARACTERS, replacementCharacter = DEFAULT_REPLACEMENT_CHARACTER) {
  83. replacedCharacters.forEach(replacedCharacter => filename = filename.replace(new RegExp("[" + replacedCharacter + "]+", "g"), replacementCharacter));
  84. filename = filename
  85. .replace(/\.\.\//g, "")
  86. .replace(/^\/+/, "")
  87. .replace(/\/+/g, "/")
  88. .replace(/\/$/, "")
  89. .replace(/\.$/, "")
  90. .replace(/\.\//g, "." + replacementCharacter)
  91. .replace(/\/\./g, "/" + replacementCharacter);
  92. return filename;
  93. },
  94. parseDocContent(content, baseURI) {
  95. const doc = (new DOMParser()).parseFromString(content, "text/html");
  96. if (!doc.head) {
  97. doc.documentElement.insertBefore(doc.createElement("HEAD"), doc.body);
  98. }
  99. let baseElement = doc.querySelector("base");
  100. if (!baseElement || !baseElement.getAttribute("href")) {
  101. if (baseElement) {
  102. baseElement.remove();
  103. }
  104. baseElement = doc.createElement("base");
  105. baseElement.setAttribute("href", baseURI);
  106. doc.head.insertBefore(baseElement, doc.head.firstChild);
  107. }
  108. return doc;
  109. },
  110. parseXMLContent(content) {
  111. return (new DOMParser()).parseFromString(content, "text/xml");
  112. },
  113. parseSVGContent(content) {
  114. return (new DOMParser()).parseFromString(content, "image/svg+xml");
  115. },
  116. async digest(algo, text) {
  117. try {
  118. const hash = await crypto.subtle.digest(algo, new TextEncoder("utf-8").encode(text));
  119. return hex(hash);
  120. } catch (error) {
  121. return "";
  122. }
  123. },
  124. getContentSize(content) {
  125. return new Blob([content]).size;
  126. },
  127. truncateText(content, maxSize) {
  128. const blob = new Blob([content]);
  129. const reader = new FileReader();
  130. reader.readAsText(blob.slice(0, maxSize));
  131. return new Promise((resolve, reject) => {
  132. reader.addEventListener("load", () => {
  133. if (content.startsWith(reader.result)) {
  134. resolve(reader.result);
  135. } else {
  136. this.truncateText(content, maxSize - 1).then(resolve).catch(reject);
  137. }
  138. }, false);
  139. reader.addEventListener("error", reject, false);
  140. });
  141. },
  142. minifyHTML(doc, options) {
  143. return modules.htmlMinifier.process(doc, options);
  144. },
  145. minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  146. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  147. },
  148. removeUnusedFonts(doc, stylesheets, styles, options) {
  149. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  150. },
  151. removeAlternativeFonts(doc, stylesheets, fontURLs, fontTests) {
  152. return modules.fontsAltMinifier.process(doc, stylesheets, fontURLs, fontTests);
  153. },
  154. getMediaAllInfo(doc, stylesheets, styles) {
  155. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  156. },
  157. compressCSS(content, options) {
  158. return vendor.cssMinifier.processString(content, options);
  159. },
  160. minifyMedias(stylesheets) {
  161. return modules.mediasAltMinifier.process(stylesheets);
  162. },
  163. removeAlternativeImages(doc) {
  164. return modules.imagesAltMinifier.process(doc);
  165. },
  166. parseSrcset(srcset) {
  167. return vendor.srcsetParser.process(srcset);
  168. },
  169. preProcessDoc(doc, win, options) {
  170. return helper.preProcessDoc(doc, win, options);
  171. },
  172. postProcessDoc(doc, markedElements) {
  173. helper.postProcessDoc(doc, markedElements);
  174. },
  175. serialize(doc, compressHTML) {
  176. return modules.serializer.process(doc, compressHTML);
  177. },
  178. removeQuotes(string) {
  179. return helper.removeQuotes(string);
  180. },
  181. waitForUserScript(eventPrefixName) {
  182. if (helper.waitForUserScript) {
  183. return helper.waitForUserScript(eventPrefixName);
  184. }
  185. },
  186. ON_BEFORE_CAPTURE_EVENT_NAME: helper.ON_BEFORE_CAPTURE_EVENT_NAME,
  187. ON_AFTER_CAPTURE_EVENT_NAME: helper.ON_AFTER_CAPTURE_EVENT_NAME,
  188. WIN_ID_ATTRIBUTE_NAME: helper.WIN_ID_ATTRIBUTE_NAME,
  189. REMOVED_CONTENT_ATTRIBUTE_NAME: helper.REMOVED_CONTENT_ATTRIBUTE_NAME,
  190. HIDDEN_CONTENT_ATTRIBUTE_NAME: helper.HIDDEN_CONTENT_ATTRIBUTE_NAME,
  191. HIDDEN_FRAME_ATTRIBUTE_NAME: helper.HIDDEN_FRAME_ATTRIBUTE_NAME,
  192. IMAGE_ATTRIBUTE_NAME: helper.IMAGE_ATTRIBUTE_NAME,
  193. POSTER_ATTRIBUTE_NAME: helper.POSTER_ATTRIBUTE_NAME,
  194. CANVAS_ATTRIBUTE_NAME: helper.CANVAS_ATTRIBUTE_NAME,
  195. HTML_IMPORT_ATTRIBUTE_NAME: helper.HTML_IMPORT_ATTRIBUTE_NAME,
  196. INPUT_VALUE_ATTRIBUTE_NAME: helper.INPUT_VALUE_ATTRIBUTE_NAME,
  197. SHADOW_ROOT_ATTRIBUTE_NAME: helper.SHADOW_ROOT_ATTRIBUTE_NAME,
  198. PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME: helper.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME,
  199. STYLESHEET_ATTRIBUTE_NAME: helper.STYLESHEET_ATTRIBUTE_NAME,
  200. SELECTED_CONTENT_ATTRIBUTE_NAME: helper.SELECTED_CONTENT_ATTRIBUTE_NAME,
  201. COMMENT_HEADER: helper.COMMENT_HEADER,
  202. COMMENT_HEADER_LEGACY: helper.COMMENT_HEADER_LEGACY,
  203. SINGLE_FILE_UI_ELEMENT_CLASS: helper.SINGLE_FILE_UI_ELEMENT_CLASS
  204. };
  205. async function getContent(resourceURL, options) {
  206. let response, startTime;
  207. const fetchResource = utilOptions.fetch;
  208. const fetchFrameResource = utilOptions.frameFetch;
  209. if (DEBUG) {
  210. startTime = Date.now();
  211. log(" // STARTED download url =", resourceURL, "asBinary =", options.asBinary);
  212. }
  213. try {
  214. if (options.frameId) {
  215. try {
  216. response = await fetchFrameResource(resourceURL, { frameId: options.frameId, referrer: options.resourceReferrer });
  217. } catch (error) {
  218. response = await fetchResource(resourceURL);
  219. }
  220. } else {
  221. response = await fetchResource(resourceURL, { referrer: options.resourceReferrer });
  222. }
  223. } catch (error) {
  224. return { data: options.asBinary ? "data:null;base64," : "", resourceURL };
  225. }
  226. let buffer;
  227. try {
  228. buffer = await response.arrayBuffer();
  229. } catch (error) {
  230. return { data: options.asBinary ? "data:null;base64," : "", resourceURL };
  231. }
  232. resourceURL = response.url || resourceURL;
  233. let contentType = "", charset;
  234. try {
  235. const mimeType = new vendor.MIMEType(response.headers.get("content-type"));
  236. contentType = mimeType.type + "/" + mimeType.subtype;
  237. charset = mimeType.parameters.get("charset");
  238. } catch (error) {
  239. // ignored
  240. }
  241. if (!contentType) {
  242. contentType = guessMIMEType(options.expectedType, buffer);
  243. }
  244. if (!charset && options.charset) {
  245. charset = options.charset;
  246. }
  247. if (options.asBinary) {
  248. try {
  249. if (DEBUG) {
  250. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  251. }
  252. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  253. return { data: "data:null;base64,", resourceURL };
  254. } else {
  255. const reader = new FileReader();
  256. reader.readAsDataURL(new Blob([buffer], { type: contentType + (options.charset ? ";charset=" + options.charset : "") }));
  257. const dataUri = await new Promise((resolve, reject) => {
  258. reader.addEventListener("load", () => resolve(reader.result), false);
  259. reader.addEventListener("error", reject, false);
  260. });
  261. return { data: dataUri, resourceURL };
  262. }
  263. } catch (error) {
  264. return { data: "data:null;base64,", resourceURL };
  265. }
  266. } else {
  267. if (response.status >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  268. return { data: "", resourceURL };
  269. }
  270. if (!charset) {
  271. charset = "utf-8";
  272. }
  273. if (DEBUG) {
  274. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  275. }
  276. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  277. return { data: "", resourceURL, charset };
  278. } else {
  279. try {
  280. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  281. } catch (error) {
  282. try {
  283. charset = "utf-8";
  284. return { data: new TextDecoder(charset).decode(buffer), resourceURL, charset };
  285. } catch (error) {
  286. return { data: "", resourceURL, charset };
  287. }
  288. }
  289. }
  290. }
  291. }
  292. }
  293. function guessMIMEType(expectedType, buffer) {
  294. if (expectedType == "image") {
  295. if (compareBytes([255, 255, 255, 255], [0, 0, 1, 0])) {
  296. return "image/x-icon";
  297. }
  298. if (compareBytes([255, 255, 255, 255], [0, 0, 2, 0])) {
  299. return "image/x-icon";
  300. }
  301. if (compareBytes([255, 255], [78, 77])) {
  302. return "image/bmp";
  303. }
  304. if (compareBytes([255, 255, 255, 255, 255, 255], [71, 73, 70, 56, 57, 97])) {
  305. return "image/gif";
  306. }
  307. if (compareBytes([255, 255, 255, 255, 255, 255], [71, 73, 70, 56, 59, 97])) {
  308. return "image/gif";
  309. }
  310. if (compareBytes([255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 255, 255], [82, 73, 70, 70, 0, 0, 0, 0, 87, 69, 66, 80, 86, 80])) {
  311. return "image/webp";
  312. }
  313. if (compareBytes([255, 255, 255, 255, 255, 255, 255, 255], [137, 80, 78, 71, 13, 10, 26, 10])) {
  314. return "image/png";
  315. }
  316. if (compareBytes([255, 255, 255], [255, 216, 255])) {
  317. return "image/jpeg";
  318. }
  319. }
  320. if (expectedType == "font") {
  321. if (compareBytes([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255],
  322. [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 80])) {
  323. return "application/vnd.ms-fontobject";
  324. }
  325. if (compareBytes([255, 255, 255, 255], [0, 1, 0, 0])) {
  326. return "font/ttf";
  327. }
  328. if (compareBytes([255, 255, 255, 255], [79, 84, 84, 79])) {
  329. return "font/otf";
  330. }
  331. if (compareBytes([255, 255, 255, 255], [116, 116, 99, 102])) {
  332. return "font/collection";
  333. }
  334. if (compareBytes([255, 255, 255, 255], [119, 79, 70, 70])) {
  335. return "font/woff";
  336. }
  337. if (compareBytes([255, 255, 255, 255], [119, 79, 70, 50])) {
  338. return "font/woff2";
  339. }
  340. }
  341. function compareBytes(mask, pattern) {
  342. let patternMatch = true, index = 0;
  343. if (buffer.byteLength >= pattern.length) {
  344. const value = new Uint8Array(buffer, 0, mask.length);
  345. for (index = 0; index < mask.length && patternMatch; index++) {
  346. patternMatch = patternMatch && ((value[index] & mask[index]) == pattern[index]);
  347. }
  348. return patternMatch;
  349. }
  350. }
  351. }
  352. // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
  353. function hex(buffer) {
  354. const hexCodes = [];
  355. const view = new DataView(buffer);
  356. for (let i = 0; i < view.byteLength; i += 4) {
  357. const value = view.getUint32(i);
  358. const stringValue = value.toString(16);
  359. const padding = "00000000";
  360. const paddedValue = (padding + stringValue).slice(-padding.length);
  361. hexCodes.push(paddedValue);
  362. }
  363. return hexCodes.join("");
  364. }
  365. function log(...args) {
  366. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  367. }
  368. })(typeof globalThis == "object" ? globalThis : window);