single-file-util.js 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global window */
  24. this.singlefile.lib.util = this.singlefile.lib.util || (() => {
  25. const DEBUG = false;
  26. const ONE_MB = 1024 * 1024;
  27. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  28. const URL = window.URL;
  29. const DOMParser = window.DOMParser;
  30. const Blob = window.Blob;
  31. const FileReader = window.FileReader;
  32. return {
  33. getInstance: (modules, util) => {
  34. if (modules.serializer === undefined) {
  35. modules.serializer = {
  36. process(doc) {
  37. const docType = doc.doctype;
  38. let docTypeString = "";
  39. if (docType) {
  40. docTypeString = "<!DOCTYPE " + docType.nodeName;
  41. if (docType.publicId) {
  42. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  43. if (docType.systemId)
  44. docTypeString += " \"" + docType.systemId + "\"";
  45. } else if (docType.systemId)
  46. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  47. if (docType.internalSubset)
  48. docTypeString += " [" + docType.internalSubset + "]";
  49. docTypeString += "> ";
  50. }
  51. return docTypeString + doc.documentElement.outerHTML;
  52. }
  53. };
  54. }
  55. return {
  56. getContent,
  57. parseURL(resourceURL, baseURI) {
  58. if (baseURI === undefined) {
  59. return new URL(resourceURL);
  60. } else {
  61. return new URL(resourceURL, baseURI);
  62. }
  63. },
  64. resolveURL(resourceURL, baseURI) {
  65. return this.parseURL(resourceURL, baseURI).href;
  66. },
  67. parseDocContent(content, baseURI) {
  68. const doc = (new DOMParser()).parseFromString(content, "text/html");
  69. if (!doc.head) {
  70. doc.documentElement.insertBefore(doc.createElement("HEAD"), doc.body);
  71. }
  72. let baseElement = doc.querySelector("base");
  73. if (!baseElement || !baseElement.getAttribute("href")) {
  74. if (baseElement) {
  75. baseElement.remove();
  76. }
  77. baseElement = doc.createElement("base");
  78. baseElement.setAttribute("href", baseURI);
  79. doc.head.insertBefore(baseElement, doc.head.firstChild);
  80. }
  81. return doc;
  82. },
  83. parseXMLContent(content) {
  84. return (new DOMParser()).parseFromString(content, "text/xml");
  85. },
  86. parseSVGContent(content) {
  87. return (new DOMParser()).parseFromString(content, "image/svg+xml");
  88. },
  89. async digest(algo, text) {
  90. return util.digestText(algo, text);
  91. },
  92. getContentSize(content) {
  93. return new Blob([content]).size;
  94. },
  95. async truncateText(content, maxSize) {
  96. const blob = new Blob([content]);
  97. const reader = new FileReader();
  98. reader.readAsText(blob.slice(0, maxSize));
  99. return new Promise((resolve, reject) => {
  100. reader.addEventListener("load", () => {
  101. if (content.startsWith(reader.result)) {
  102. resolve(reader.result);
  103. } else {
  104. this.truncateText(content, maxSize - 1).then(resolve).catch(reject);
  105. }
  106. }, false);
  107. reader.addEventListener("error", reject, false);
  108. });
  109. },
  110. minifyHTML(doc, options) {
  111. return modules.htmlMinifier.process(doc, options);
  112. },
  113. postMinifyHTML(doc) {
  114. return modules.htmlMinifier.postProcess(doc);
  115. },
  116. minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  117. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  118. },
  119. removeUnusedFonts(doc, stylesheets, styles, options) {
  120. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  121. },
  122. removeAlternativeFonts(doc, stylesheets) {
  123. return modules.fontsAltMinifier.process(doc, stylesheets);
  124. },
  125. getMediaAllInfo(doc, stylesheets, styles) {
  126. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  127. },
  128. compressCSS(content, options) {
  129. return modules.cssMinifier.processString(content, options);
  130. },
  131. minifyMedias(stylesheets) {
  132. return modules.mediasAltMinifier.process(stylesheets);
  133. },
  134. removeAlternativeImages(doc) {
  135. return modules.imagesAltMinifier.process(doc);
  136. },
  137. parseSrcset(srcset) {
  138. return modules.srcsetParser.process(srcset);
  139. },
  140. preProcessDoc(doc, win, options) {
  141. return modules.helper.preProcessDoc(doc, win, options);
  142. },
  143. postProcessDoc(doc, markedElements) {
  144. modules.helper.postProcessDoc(doc, markedElements);
  145. },
  146. serialize(doc, compressHTML) {
  147. return modules.serializer.process(doc, compressHTML);
  148. },
  149. removeQuotes(string) {
  150. return modules.helper.removeQuotes(string);
  151. },
  152. WIN_ID_ATTRIBUTE_NAME: modules.helper.WIN_ID_ATTRIBUTE_NAME,
  153. REMOVED_CONTENT_ATTRIBUTE_NAME: modules.helper.REMOVED_CONTENT_ATTRIBUTE_NAME,
  154. IMAGE_ATTRIBUTE_NAME: modules.helper.IMAGE_ATTRIBUTE_NAME,
  155. POSTER_ATTRIBUTE_NAME: modules.helper.POSTER_ATTRIBUTE_NAME,
  156. CANVAS_ATTRIBUTE_NAME: modules.helper.CANVAS_ATTRIBUTE_NAME,
  157. HTML_IMPORT_ATTRIBUTE_NAME: modules.helper.HTML_IMPORT_ATTRIBUTE_NAME,
  158. INPUT_VALUE_ATTRIBUTE_NAME: modules.helper.INPUT_VALUE_ATTRIBUTE_NAME,
  159. SHADOW_ROOT_ATTRIBUTE_NAME: modules.helper.SHADOW_ROOT_ATTRIBUTE_NAME,
  160. PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME: modules.helper.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME,
  161. STYLESHEET_ATTRIBUTE_NAME: modules.helper.STYLESHEET_ATTRIBUTE_NAME
  162. };
  163. async function getContent(resourceURL, options) {
  164. let resourceContent, startTime;
  165. if (DEBUG) {
  166. startTime = Date.now();
  167. log(" // STARTED download url =", resourceURL, "asBinary =", options.asBinary);
  168. }
  169. try {
  170. resourceContent = await util.getResourceContent(resourceURL);
  171. } catch (error) {
  172. return { data: options.asBinary ? "data:base64," : "", resourceURL };
  173. }
  174. resourceURL = resourceContent.getUrl();
  175. let contentType = resourceContent.getContentType();
  176. let charset;
  177. if (contentType) {
  178. const matchContentType = contentType.toLowerCase().split(";");
  179. contentType = matchContentType[0].trim();
  180. if (!contentType.includes("/")) {
  181. contentType = null;
  182. }
  183. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  184. if (charsetValue) {
  185. const matchCharset = charsetValue.match(/^charset=(.*)/);
  186. if (matchCharset && matchCharset[1]) {
  187. charset = modules.helper.removeQuotes(matchCharset[1].trim());
  188. }
  189. }
  190. }
  191. if (!charset && options.charset) {
  192. charset = options.charset;
  193. }
  194. if (options.asBinary) {
  195. try {
  196. if (DEBUG) {
  197. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  198. }
  199. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  200. return { data: "data:base64,", resourceURL };
  201. } else {
  202. const dataUri = await resourceContent.getDataUri(contentType);
  203. return { data: dataUri, resourceURL };
  204. }
  205. } catch (error) {
  206. return { data: "data:base64,", resourceURL };
  207. }
  208. } else {
  209. if (resourceContent.getStatusCode() >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  210. return { data: "", resourceURL };
  211. }
  212. if (!charset) {
  213. charset = "utf-8";
  214. }
  215. if (DEBUG) {
  216. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  217. }
  218. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  219. return { data: "", resourceURL, charset };
  220. } else {
  221. try {
  222. return { data: resourceContent.getText(charset), resourceURL, charset };
  223. } catch (error) {
  224. try {
  225. charset = "utf-8";
  226. return { data: resourceContent.getText(charset), resourceURL, charset };
  227. } catch (error) {
  228. return { data: "", resourceURL, charset };
  229. }
  230. }
  231. }
  232. }
  233. }
  234. }
  235. };
  236. function log(...args) {
  237. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  238. }
  239. })();