doc-util-core.js 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. this.DocUtilCore = this.DocUtilCore || (() => {
  21. const DEBUG = false;
  22. const ONE_MB = 1024 * 1024;
  23. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  24. return {
  25. getClass: (modules, domUtil) => {
  26. if (modules.serializer === undefined) {
  27. modules.serializer = {
  28. process(doc) {
  29. const docType = doc.doctype;
  30. let docTypeString = "";
  31. if (docType) {
  32. docTypeString = "<!DOCTYPE " + docType.nodeName;
  33. if (docType.publicId) {
  34. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  35. if (docType.systemId)
  36. docTypeString += " \"" + docType.systemId + "\"";
  37. } else if (docType.systemId)
  38. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  39. if (docType.internalSubset)
  40. docTypeString += " [" + docType.internalSubset + "]";
  41. docTypeString += "> ";
  42. }
  43. return docTypeString + doc.documentElement.outerHTML;
  44. }
  45. };
  46. }
  47. return class DocUtil {
  48. static async getContent(resourceURL, options) {
  49. let resourceContent, startTime;
  50. if (DEBUG) {
  51. startTime = Date.now();
  52. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  53. }
  54. try {
  55. resourceContent = await domUtil.getResourceContent(resourceURL, options);
  56. } catch (error) {
  57. return { data: options.asDataURI ? "data:base64," : "", resourceURL };
  58. }
  59. resourceURL = resourceContent.getUrl();
  60. let contentType = resourceContent.getContentType();
  61. let charset;
  62. if (contentType) {
  63. const matchContentType = contentType.toLowerCase().split(";");
  64. contentType = matchContentType[0].trim();
  65. if (!contentType.includes("/")) {
  66. contentType = null;
  67. }
  68. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  69. if (charsetValue) {
  70. const matchCharset = charsetValue.match(/^charset=(.*)/);
  71. if (matchCharset && matchCharset[1]) {
  72. charset = modules.docHelper.removeQuotes(matchCharset[1].trim());
  73. }
  74. }
  75. }
  76. if (!charset && options.charset) {
  77. charset = options.charset;
  78. }
  79. if (options.asDataURI) {
  80. try {
  81. if (DEBUG) {
  82. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  83. }
  84. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  85. return { data: "data:base64,", resourceURL };
  86. } else {
  87. const dataUri = await resourceContent.getDataUri(contentType);
  88. return { data: dataUri, resourceURL };
  89. }
  90. } catch (error) {
  91. return { data: "data:base64,", resourceURL };
  92. }
  93. } else {
  94. if (resourceContent.getStatusCode() >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  95. return { data: "", resourceURL };
  96. }
  97. if (!charset) {
  98. charset = "utf-8";
  99. }
  100. if (DEBUG) {
  101. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  102. }
  103. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  104. return { data: "", resourceURL, charset };
  105. } else {
  106. try {
  107. return { data: resourceContent.getText(charset), resourceURL, charset };
  108. } catch (error) {
  109. try {
  110. charset = "utf-8";
  111. return { data: resourceContent.getText(charset), resourceURL, charset };
  112. } catch (error) {
  113. return { data: "", resourceURL, charset };
  114. }
  115. }
  116. }
  117. }
  118. }
  119. static parseURL(resourceURL, baseURI) {
  120. return domUtil.parseURL(resourceURL, baseURI);
  121. }
  122. static resolveURL(resourceURL, baseURI) {
  123. return this.parseURL(resourceURL, baseURI).href;
  124. }
  125. static parseDocContent(content, baseURI) {
  126. const doc = domUtil.parseDocContent(content);
  127. let baseElement = doc.querySelector("base");
  128. if (!baseElement || !baseElement.getAttribute("href")) {
  129. if (baseElement) {
  130. baseElement.remove();
  131. }
  132. baseElement = doc.createElement("base");
  133. baseElement.setAttribute("href", baseURI);
  134. doc.head.insertBefore(baseElement, doc.head.firstChild);
  135. }
  136. return doc;
  137. }
  138. static parseSVGContent(content) {
  139. return domUtil.parseSVGContent(content);
  140. }
  141. static async digest(algo, text) {
  142. return domUtil.digestText(algo, text);
  143. }
  144. static getContentSize(content) {
  145. return domUtil.getContentSize(content);
  146. }
  147. static async validFont(urlFunction) {
  148. return domUtil.isValidFontUrl(urlFunction);
  149. }
  150. static minifyHTML(doc, options) {
  151. return modules.htmlMinifier.process(doc, options);
  152. }
  153. static postMinifyHTML(doc) {
  154. return modules.htmlMinifier.postProcess(doc);
  155. }
  156. static minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  157. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  158. }
  159. static removeUnusedFonts(doc, stylesheets, styles, options) {
  160. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  161. }
  162. static removeAlternativeFonts(doc, stylesheets) {
  163. return modules.fontsAltMinifier.process(doc, stylesheets);
  164. }
  165. static getMediaAllInfo(doc, stylesheets, styles) {
  166. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  167. }
  168. static compressCSS(content, options) {
  169. return modules.cssMinifier.processString(content, options);
  170. }
  171. static minifyMedias(stylesheets) {
  172. return modules.mediasMinifier.process(stylesheets);
  173. }
  174. static removeAlternativeImages(doc, options) {
  175. return modules.imagesAltMinifier.process(doc, options);
  176. }
  177. static parseSrcset(srcset) {
  178. return modules.srcsetParser.process(srcset);
  179. }
  180. static preProcessDoc(doc, win, options) {
  181. return modules.docHelper.preProcessDoc(doc, win, options);
  182. }
  183. static postProcessDoc(doc, options) {
  184. modules.docHelper.postProcessDoc(doc, options);
  185. }
  186. static serialize(doc, compressHTML) {
  187. return modules.serializer.process(doc, compressHTML);
  188. }
  189. static removeQuotes(string) {
  190. return modules.docHelper.removeQuotes(string);
  191. }
  192. static windowIdAttributeName(sessionId) {
  193. return modules.docHelper.windowIdAttributeName(sessionId);
  194. }
  195. static preservedSpaceAttributeName(sessionId) {
  196. return modules.docHelper.preservedSpaceAttributeName(sessionId);
  197. }
  198. static removedContentAttributeName(sessionId) {
  199. return modules.docHelper.removedContentAttributeName(sessionId);
  200. }
  201. static imagesAttributeName(sessionId) {
  202. return modules.docHelper.imagesAttributeName(sessionId);
  203. }
  204. static inputValueAttributeName(sessionId) {
  205. return modules.docHelper.inputValueAttributeName(sessionId);
  206. }
  207. static shadowRootAttributeName(sessionId) {
  208. return modules.docHelper.shadowRootAttributeName(sessionId);
  209. }
  210. };
  211. }
  212. };
  213. function log(...args) {
  214. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  215. }
  216. })();