doc-util-core.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. this.DocUtilCore = this.DocUtilCore || (() => {
  21. const DEBUG = false;
  22. const ONE_MB = 1024 * 1024;
  23. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  24. return {
  25. getClass: (modules, domUtil) => {
  26. if (modules.serializer === undefined) {
  27. modules.serializer = {
  28. process(doc) {
  29. const docType = doc.doctype;
  30. let docTypeString = "";
  31. if (docType) {
  32. docTypeString = "<!DOCTYPE " + docType.nodeName;
  33. if (docType.publicId) {
  34. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  35. if (docType.systemId)
  36. docTypeString += " \"" + docType.systemId + "\"";
  37. } else if (docType.systemId)
  38. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  39. if (docType.internalSubset)
  40. docTypeString += " [" + docType.internalSubset + "]";
  41. docTypeString += "> ";
  42. }
  43. return docTypeString + doc.documentElement.outerHTML;
  44. }
  45. };
  46. }
  47. return class DocUtil {
  48. static async getContent(resourceURL, options) {
  49. let resourceContent, startTime;
  50. if (DEBUG) {
  51. startTime = Date.now();
  52. log(" // STARTED download url =", resourceURL, "asDataURI =", options.asDataURI);
  53. }
  54. try {
  55. resourceContent = await domUtil.getResourceContent(resourceURL, options);
  56. } catch (error) {
  57. return { data: options.asDataURI ? "data:base64," : "", resourceURL };
  58. }
  59. resourceURL = resourceContent.getUrl();
  60. let contentType = resourceContent.getContentType();
  61. let charset;
  62. if (contentType) {
  63. const matchContentType = contentType.toLowerCase().split(";");
  64. contentType = matchContentType[0].trim();
  65. if (!contentType.includes("/")) {
  66. contentType = null;
  67. }
  68. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  69. if (charsetValue) {
  70. const matchCharset = charsetValue.match(/^charset=(.*)/);
  71. if (matchCharset && matchCharset[1]) {
  72. charset = modules.docHelper.removeQuotes(matchCharset[1].trim());
  73. }
  74. }
  75. }
  76. if (!charset && options.charset) {
  77. charset = options.charset;
  78. }
  79. if (options.asDataURI) {
  80. try {
  81. if (DEBUG) {
  82. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  83. }
  84. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  85. return { data: "data:base64,", resourceURL };
  86. } else {
  87. const dataUri = await resourceContent.getDataUri(contentType);
  88. return { data: dataUri, resourceURL };
  89. }
  90. } catch (error) {
  91. return { data: "data:base64,", resourceURL };
  92. }
  93. } else {
  94. if (resourceContent.getStatusCode() >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  95. return { data: "", resourceURL };
  96. }
  97. if (!charset) {
  98. charset = "utf-8";
  99. }
  100. if (DEBUG) {
  101. log(" // ENDED download url =", resourceURL, "delay =", Date.now() - startTime);
  102. }
  103. if (options.maxResourceSizeEnabled && resourceContent.getSize() > options.maxResourceSize * ONE_MB) {
  104. return { data: "", resourceURL, charset };
  105. } else {
  106. try {
  107. return { data: resourceContent.getText(charset), resourceURL, charset };
  108. } catch (error) {
  109. try {
  110. charset = "utf-8";
  111. return { data: resourceContent.getText(charset), resourceURL, charset };
  112. } catch (error) {
  113. return { data: "", resourceURL, charset };
  114. }
  115. }
  116. }
  117. }
  118. }
  119. static parseURL(resourceURL, baseURI) {
  120. return domUtil.parseURL(resourceURL, baseURI);
  121. }
  122. static resolveURL(resourceURL, baseURI) {
  123. return this.parseURL(resourceURL, baseURI).href;
  124. }
  125. static parseDocContent(content, baseURI) {
  126. return domUtil.parseDocContent(content, baseURI);
  127. }
  128. static parseSVGContent(content) {
  129. return domUtil.parseSVGContent(content);
  130. }
  131. static async digest(algo, text) {
  132. return domUtil.digestText(algo, text);
  133. }
  134. static getContentSize(content) {
  135. return domUtil.getContentSize(content);
  136. }
  137. static async validFont(urlFunction) {
  138. return domUtil.isValidFontUrl(urlFunction);
  139. }
  140. static minifyHTML(doc, options) {
  141. return modules.htmlMinifier.process(doc, options);
  142. }
  143. static postMinifyHTML(doc) {
  144. return modules.htmlMinifier.postProcess(doc);
  145. }
  146. static minifyCSSRules(stylesheets, styles, mediaAllInfo) {
  147. return modules.cssRulesMinifier.process(stylesheets, styles, mediaAllInfo);
  148. }
  149. static removeUnusedFonts(doc, stylesheets, styles, options) {
  150. return modules.fontsMinifier.process(doc, stylesheets, styles, options);
  151. }
  152. static removeAlternativeFonts(doc, stylesheets) {
  153. return modules.fontsAltMinifier.process(doc, stylesheets);
  154. }
  155. static getMediaAllInfo(doc, stylesheets, styles) {
  156. return modules.matchedRules.getMediaAllInfo(doc, stylesheets, styles);
  157. }
  158. static compressCSS(content, options) {
  159. return modules.cssMinifier.processString(content, options);
  160. }
  161. static minifyMedias(stylesheets) {
  162. return modules.mediasMinifier.process(stylesheets);
  163. }
  164. static removeAlternativeImages(doc, options) {
  165. return modules.imagesAltMinifier.process(doc, options);
  166. }
  167. static parseSrcset(srcset) {
  168. return modules.srcsetParser.process(srcset);
  169. }
  170. static preProcessDoc(doc, win, options) {
  171. return modules.docHelper.preProcessDoc(doc, win, options);
  172. }
  173. static postProcessDoc(doc, options) {
  174. modules.docHelper.postProcessDoc(doc, options);
  175. }
  176. static serialize(doc, compressHTML) {
  177. return modules.serializer.process(doc, compressHTML);
  178. }
  179. static removeQuotes(string) {
  180. return modules.docHelper.removeQuotes(string);
  181. }
  182. static windowIdAttributeName(sessionId) {
  183. return modules.docHelper.windowIdAttributeName(sessionId);
  184. }
  185. static preservedSpaceAttributeName(sessionId) {
  186. return modules.docHelper.preservedSpaceAttributeName(sessionId);
  187. }
  188. static removedContentAttributeName(sessionId) {
  189. return modules.docHelper.removedContentAttributeName(sessionId);
  190. }
  191. static imagesAttributeName(sessionId) {
  192. return modules.docHelper.imagesAttributeName(sessionId);
  193. }
  194. static inputValueAttributeName(sessionId) {
  195. return modules.docHelper.inputValueAttributeName(sessionId);
  196. }
  197. static shadowRootAttributeName(sessionId) {
  198. return modules.docHelper.shadowRootAttributeName(sessionId);
  199. }
  200. };
  201. }
  202. };
  203. function log(...args) {
  204. console.log("S-File <browser>", ...args); // eslint-disable-line no-console
  205. }
  206. })();