doc-util.js 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global DOMParser, URL, Blob, FileReader */
  21. this.docUtil = this.docUtil || (() => {
  22. const DEBUG = false;
  23. const ONE_MB = 1024 * 1024;
  24. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  25. return {
  26. getInstance: (modules, domUtil) => {
  27. if (modules.serializer === undefined) {
  28. modules.serializer = {
  29. process(doc) {
  30. const docType = doc.doctype;
  31. let docTypeString = "";
  32. if (docType) {
  33. docTypeString = "<!DOCTYPE " + docType.nodeName;
  34. if (docType.publicId) {
  35. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  36. if (docType.systemId)
  37. docTypeString += " \"" + docType.systemId + "\"";
  38. } else if (docType.systemId)
  39. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  40. if (docType.internalSubset)
  41. docTypeString += " [" + docType.internalSubset + "]";
  42. docTypeString += "> ";
  43. }
  44. return docTypeString + doc.documentElement.outerHTML;
  45. }
  46. };
  47. }
  48. return {
  49. getContent,
  50. parseURL(resourceURL, baseURI) {
  51. return new URL(resourceURL, baseURI);
  52. },
  53. resolveURL(resourceURL, baseURI) {
  54. return this.parseURL(resourceURL, baseURI).href;
  55. },
  56. parseDocContent(content, baseURI) {
  57. const doc = (new DOMParser()).parseFromString(content, "text/html");
  58. if (!doc.head) {
  59. doc.documentElement.insertBefore(doc.createElement("HEAD"), doc.body);
  60. }
  61. let baseElement = doc.querySelector("base");
  62. if (!baseElement || !baseElement.getAttribute("href")) {
  63. if (baseElement) {
  64. baseElement.remove();
  65. }
  66. baseElement = doc.createElement("base");
  67. baseElement.setAttribute("href", baseURI);
  68. doc.head.insertBefore(baseElement, doc.head.firstChild);
  69. }
  70. return doc;
  71. },
  72. parseXMLContent(content) {
  73. return (new DOMParser()).parseFromString(content, "text/xml");
  74. },
  75. parseSVGContent(content) {
  76. return (new DOMParser()).parseFromString(content, "image/svg+xml");
  77. },
  78. async digest(algo, text) {
  79. return domUtil.digestText(algo, text);
  80. },
  81. async validFont(urlFunction) {
  82. return domUtil.isValidFontUrl(urlFunction);
  83. },
  84. getContentSize(content) {
  85. return new Blob([content]).size;
  86. },
  87. async truncateContent(content, maxSize) {
  88. const blob = new Blob([content]);
  89. const reader = new FileReader();
  90. reader.readAsText(blob.slice(0, maxSize));
  91. return await new Promise((resolve, reject) => {
  92. reader.addEventListener("load", () => {
  93. if (content.startsWith(reader.result)) {
  94. resolve(reader.result);
  95. } else {
  96. this.truncateContent(content, maxSize - 1).then(resolve).catch(reject);
  97. }
  98. }, false);
  99. reader.addEventListener("error", reject, false);
  100. });
  101. },
  102. minifyHTML(doc, options) {
  103. return modules.htmlMinifier.process(doc, options);
  104. },
  105. postMinifyHTML(doc) {
  106. return modules.htmlMinifier.postProcess(doc);
  107. },
  108. minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  109. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  110. },
  111. removeUnusedFonts(doc, stylesheets, styles, options) {
  112. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  113. },
  114. removeAlternativeFonts(doc, stylesheets) {
  115. return modules.fontsAltMinifier.process(doc, stylesheets);
  116. },
  117. getMediaAllInfo(doc, stylesheets, styles) {
  118. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  119. },
  120. compressCSS(content, options) {
  121. return modules.cssMinifier.processString(content, options);
  122. },
  123. minifyMedias(stylesheets) {
  124. return modules.mediasAltMinifier.process(stylesheets);
  125. },
  126. removeAlternativeImages(doc, options) {
  127. return modules.imagesAltMinifier.process(doc, options);
  128. },
  129. parseSrcset(srcset) {
  130. return modules.srcsetParser.process(srcset);
  131. },
  132. preProcessDoc(doc, win, options) {
  133. return modules.docHelper.preProcessDoc(doc, win, options);
  134. },
  135. postProcessDoc(doc, options) {
  136. modules.docHelper.postProcessDoc(doc, options);
  137. },
  138. serialize(doc, compressHTML) {
  139. return modules.serializer.process(doc, compressHTML);
  140. },
  141. removeQuotes(string) {
  142. return modules.docHelper.removeQuotes(string);
  143. },
  144. WIN_ID_ATTRIBUTE_NAME: modules.docHelper.WIN_ID_ATTRIBUTE_NAME,
  145. REMOVED_CONTENT_ATTRIBUTE_NAME: modules.docHelper.REMOVED_CONTENT_ATTRIBUTE_NAME,
  146. IMAGE_ATTRIBUTE_NAME: modules.docHelper.IMAGE_ATTRIBUTE_NAME,
  147. INPUT_VALUE_ATTRIBUTE_NAME: modules.docHelper.INPUT_VALUE_ATTRIBUTE_NAME,
  148. SHADOW_ROOT_ATTRIBUTE_NAME: modules.docHelper.SHADOW_ROOT_ATTRIBUTE_NAME,
  149. PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME: modules.docHelper.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME
  150. };
  151. async function getContent(resourceURL, options) {
  152. let resourceContent, startTime;
  153. if (DEBUG) {
  154. startTime = Date.now();
  155. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  156. }
  157. try {
  158. resourceContent = await domUtil.getResourceContent(resourceURL, options);
  159. } catch (error) {
  160. return { data: options.asDataURI ? "data:base64," : "", resourceURL };
  161. }
  162. resourceURL = resourceContent.getUrl();
  163. let contentType = resourceContent.getContentType();
  164. let charset;
  165. if (contentType) {
  166. const matchContentType = contentType.toLowerCase().split(";");
  167. contentType = matchContentType[0].trim();
  168. if (!contentType.includes("/")) {
  169. contentType = null;
  170. }
  171. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  172. if (charsetValue) {
  173. const matchCharset = charsetValue.match(/^charset=(.*)/);
  174. if (matchCharset && matchCharset[1]) {
  175. charset = modules.docHelper.removeQuotes(matchCharset[1].trim());
  176. }
  177. }
  178. }
  179. if (!charset && options.charset) {
  180. charset = options.charset;
  181. }
  182. if (options.asDataURI) {
  183. try {
  184. if (DEBUG) {
  185. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  186. }
  187. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  188. return { data: "data:base64,", resourceURL };
  189. } else {
  190. const dataUri = await resourceContent.getDataUri(contentType);
  191. return { data: dataUri, resourceURL };
  192. }
  193. } catch (error) {
  194. return { data: "data:base64,", resourceURL };
  195. }
  196. } else {
  197. if (resourceContent.getStatusCode() >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  198. return { data: "", resourceURL };
  199. }
  200. if (!charset) {
  201. charset = "utf-8";
  202. }
  203. if (DEBUG) {
  204. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  205. }
  206. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  207. return { data: "", resourceURL, charset };
  208. } else {
  209. try {
  210. return { data: resourceContent.getText(charset), resourceURL, charset };
  211. } catch (error) {
  212. try {
  213. charset = "utf-8";
  214. return { data: resourceContent.getText(charset), resourceURL, charset };
  215. } catch (error) {
  216. return { data: "", resourceURL, charset };
  217. }
  218. }
  219. }
  220. }
  221. }
  222. }
  223. };
  224. function log(...args) {
  225. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  226. }
  227. })();