single-file-node.js 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global require, exports, Buffer */
  24. const fs = require("fs");
  25. const crypto = require("crypto");
  26. const jsdom = require("jsdom");
  27. const dataUri = require("strong-data-uri");
  28. const iconv = require("iconv-lite");
  29. const request = require("request-promise-native");
  30. const { JSDOM } = jsdom;
  31. const ONE_MB = 1024 * 1024;
  32. const PREFIX_CONTENT_TYPE_TEXT = "text/";
  33. const SCRIPTS = [
  34. "./lib/single-file/util/doc-util-core.js",
  35. "./lib/single-file/util/doc-helper.js",
  36. "./lib/single-file/vendor/css-tree.js",
  37. "./lib/single-file/vendor/html-srcset-parser.js",
  38. "./lib/single-file/vendor/css-minifier.js",
  39. "./lib/single-file/vendor/css-font-property-parser.js",
  40. "./lib/single-file/vendor/css-media-query-parser.js",
  41. "./lib/single-file/single-file-core.js",
  42. "./lib/single-file/modules/html-minifier.js",
  43. "./lib/single-file/modules/css-fonts-minifier.js",
  44. "./lib/single-file/modules/css-fonts-alt-minifier.js",
  45. "./lib/single-file/modules/css-matched-rules.js",
  46. "./lib/single-file/modules/css-medias-alt-minifier.js",
  47. "./lib/single-file/modules/css-rules-minifier.js",
  48. "./lib/single-file/modules/html-images-alt-minifier.js",
  49. "./lib/single-file/modules/html-serializer.js",
  50. ];
  51. SCRIPTS.forEach(scriptPath => eval(fs.readFileSync(scriptPath).toString()));
  52. const docHelper = this.docHelper;
  53. const modules = {
  54. docHelper: docHelper,
  55. srcsetParser: this.srcsetParser,
  56. cssMinifier: this.cssMinifier,
  57. htmlMinifier: this.htmlMinifier,
  58. serializer: this.serializer,
  59. fontsMinifier: this.fontsMinifier.getInstance(this.cssTree, this.fontPropertyParser, docHelper),
  60. fontsAltMinifier: this.fontsAltMinifier.getInstance(this.cssTree),
  61. cssRulesMinifier: this.cssRulesMinifier.getInstance(this.cssTree),
  62. matchedRules: this.matchedRules.getInstance(this.cssTree),
  63. mediasMinifier: this.mediasMinifier.getInstance(this.cssTree, this.mediaQueryParser),
  64. imagesAltMinifier: this.imagesAltMinifier.getInstance(this.srcsetParser)
  65. };
  66. const domUtil = {
  67. getContent,
  68. parseDocContent,
  69. parseSVGContent,
  70. isValidFontUrl,
  71. getContentSize,
  72. digestText
  73. };
  74. exports.getClass = () => {
  75. const DocUtil = this.DocUtilCore.getClass(modules, domUtil);
  76. return this.SingleFileCore.getClass(DocUtil, this.cssTree);
  77. };
  78. function parseDocContent(content, baseURI) {
  79. const doc = (new JSDOM(content, {
  80. contentType: "text/html"
  81. })).window.document;
  82. let baseElement = doc.querySelector("base");
  83. if (!baseElement || !baseElement.getAttribute("href")) {
  84. if (baseElement) {
  85. baseElement.remove();
  86. }
  87. baseElement = doc.createElement("base");
  88. baseElement.setAttribute("href", baseURI);
  89. doc.head.insertBefore(baseElement, doc.head.firstChild);
  90. }
  91. return doc;
  92. }
  93. function parseSVGContent(content) {
  94. return (new JSDOM(content, {
  95. contentType: "image/svg+xml"
  96. })).window.document;
  97. }
  98. async function digestText(algo, text) {
  99. const hash = crypto.createHash(algo.replace("-", "").toLowerCase());
  100. hash.update(text, "utf-8");
  101. return hash.digest("hex");
  102. }
  103. function getContentSize(content) {
  104. return Buffer.byteLength(content, "utf-8");
  105. }
  106. function isValidFontUrl(/* urlFunction */) {
  107. // TODO?
  108. return true;
  109. }
  110. async function getContent(resourceURL, options) {
  111. const requestOptions = {
  112. method: "GET",
  113. uri: resourceURL,
  114. resolveWithFullResponse: true,
  115. encoding: null,
  116. headers: {
  117. "User-Agent": options.userAgent
  118. }
  119. };
  120. let resourceContent;
  121. try {
  122. resourceContent = await request(requestOptions);
  123. } catch (e) {
  124. return options.asDataURI ? "data:base64," : "";
  125. }
  126. let contentType = resourceContent.headers["content-type"];
  127. let charset;
  128. if (contentType) {
  129. const matchContentType = contentType.toLowerCase().split(";");
  130. contentType = matchContentType[0].trim();
  131. if (!contentType.includes("/")) {
  132. contentType = null;
  133. }
  134. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  135. if (charsetValue) {
  136. const matchCharset = charsetValue.match(/^charset=(.*)/);
  137. if (matchCharset && matchCharset[1]) {
  138. charset = docHelper.removeQuotes(matchCharset[1].trim());
  139. }
  140. }
  141. }
  142. if (!charset && options.charset) {
  143. charset = options.charset;
  144. }
  145. if (options && options.asDataURI) {
  146. try {
  147. const buffer = resourceContent.body;
  148. if (options.maxResourceSizeEnabled && buffer.byteLength > options.maxResourceSize * ONE_MB) {
  149. return { data: "data:base64,", resourceURL };
  150. } else {
  151. return { data: dataUri.encode(buffer, contentType), resourceURL };
  152. }
  153. } catch (e) {
  154. return { data: "data:base64,", resourceURL };
  155. }
  156. } else {
  157. if (resourceContent.statusCode >= 400 || (options.validateTextContentType && contentType && !contentType.startsWith(PREFIX_CONTENT_TYPE_TEXT))) {
  158. return { data: "", resourceURL };
  159. }
  160. if (!charset) {
  161. charset = "utf-8";
  162. }
  163. try {
  164. return { data: iconv.decode(resourceContent.body, charset), charset };
  165. } catch (e) {
  166. return { data: resourceContent.body.toString("utf8"), charset: "utf8" };
  167. }
  168. }
  169. }