html-minifier.js 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. // Derived from the work of Kirill Maltsev - https://github.com/posthtml/htmlnano
  24. // Source: https://github.com/kangax/html-minifier/issues/63
  25. const booleanAttributes = [
  26. "allowfullscreen",
  27. "async",
  28. "autofocus",
  29. "autoplay",
  30. "checked",
  31. "compact",
  32. "controls",
  33. "declare",
  34. "default",
  35. "defaultchecked",
  36. "defaultmuted",
  37. "defaultselected",
  38. "defer",
  39. "disabled",
  40. "enabled",
  41. "formnovalidate",
  42. "hidden",
  43. "indeterminate",
  44. "inert",
  45. "ismap",
  46. "itemscope",
  47. "loop",
  48. "multiple",
  49. "muted",
  50. "nohref",
  51. "noresize",
  52. "noshade",
  53. "novalidate",
  54. "nowrap",
  55. "open",
  56. "pauseonexit",
  57. "readonly",
  58. "required",
  59. "reversed",
  60. "scoped",
  61. "seamless",
  62. "selected",
  63. "sortable",
  64. "truespeed",
  65. "typemustmatch",
  66. "visible"
  67. ];
  68. const noWhitespaceCollapseElements = ["script", "style", "pre", "textarea"];
  69. // Source: https://www.w3.org/TR/html4/sgml/dtd.html#events (Generic Attributes)
  70. const safeToRemoveAttrs = [
  71. "id",
  72. "class",
  73. "style",
  74. "lang",
  75. "dir",
  76. "onclick",
  77. "ondblclick",
  78. "onmousedown",
  79. "onmouseup",
  80. "onmouseover",
  81. "onmousemove",
  82. "onmouseout",
  83. "onkeypress",
  84. "onkeydown",
  85. "onkeyup"
  86. ];
  87. const redundantAttributes = {
  88. "form": {
  89. "method": "get"
  90. },
  91. "script": {
  92. "language": "javascript",
  93. "type": "text/javascript",
  94. // Remove attribute if the function returns false
  95. "charset": node => {
  96. // The charset attribute only really makes sense on “external” SCRIPT elements:
  97. // http://perfectionkills.com/optimizing-html/#8_script_charset
  98. return !node.getAttribute("src");
  99. }
  100. },
  101. "style": {
  102. "media": "all",
  103. "type": "text/css"
  104. },
  105. "link": {
  106. "media": "all"
  107. }
  108. };
  109. const REGEXP_WHITESPACE = /[ \t\f\r]+/g;
  110. const REGEXP_NEWLINE = /[\n]+/g;
  111. const REGEXP_ENDS_WHITESPACE = /^\s+$/;
  112. const NodeFilter_SHOW_ALL = 4294967295;
  113. const Node_ELEMENT_NODE = 1;
  114. const Node_TEXT_NODE = 3;
  115. const Node_COMMENT_NODE = 8;
  116. const modules = [
  117. collapseBooleanAttributes,
  118. mergeTextNodes,
  119. collapseWhitespace,
  120. removeComments,
  121. removeEmptyAttributes,
  122. removeRedundantAttributes,
  123. compressJSONLD,
  124. node => mergeElements(node, "style", (node, previousSibling) => node.parentElement && node.parentElement.tagName == "HEAD" && node.media == previousSibling.media && node.title == previousSibling.title)
  125. ];
  126. export {
  127. process
  128. };
  129. function process(doc, options) {
  130. removeEmptyInlineElements(doc);
  131. const nodesWalker = doc.createTreeWalker(doc.documentElement, NodeFilter_SHOW_ALL, null, false);
  132. let node = nodesWalker.nextNode();
  133. while (node) {
  134. const deletedNode = modules.find(module => module(node, options));
  135. const previousNode = node;
  136. node = nodesWalker.nextNode();
  137. if (deletedNode) {
  138. previousNode.remove();
  139. }
  140. }
  141. }
  142. function collapseBooleanAttributes(node) {
  143. if (node.nodeType == Node_ELEMENT_NODE) {
  144. Array.from(node.attributes).forEach(attribute => {
  145. if (booleanAttributes.includes(attribute.name)) {
  146. node.setAttribute(attribute.name, "");
  147. }
  148. });
  149. }
  150. }
  151. function mergeTextNodes(node) {
  152. if (node.nodeType == Node_TEXT_NODE) {
  153. if (node.previousSibling && node.previousSibling.nodeType == Node_TEXT_NODE) {
  154. node.textContent = node.previousSibling.textContent + node.textContent;
  155. node.previousSibling.remove();
  156. }
  157. }
  158. }
  159. function mergeElements(node, tagName, acceptMerge) {
  160. if (node.nodeType == Node_ELEMENT_NODE && node.tagName.toLowerCase() == tagName.toLowerCase()) {
  161. let previousSibling = node.previousSibling;
  162. const previousSiblings = [];
  163. while (previousSibling && previousSibling.nodeType == Node_TEXT_NODE && !previousSibling.textContent.trim()) {
  164. previousSiblings.push(previousSibling);
  165. previousSibling = previousSibling.previousSibling;
  166. }
  167. if (previousSibling && previousSibling.nodeType == Node_ELEMENT_NODE && previousSibling.tagName == node.tagName && acceptMerge(node, previousSibling)) {
  168. node.textContent = previousSibling.textContent + node.textContent;
  169. previousSiblings.forEach(node => node.remove());
  170. previousSibling.remove();
  171. }
  172. }
  173. }
  174. function collapseWhitespace(node, options) {
  175. if (node.nodeType == Node_TEXT_NODE) {
  176. let element = node.parentElement;
  177. const spacePreserved = element.getAttribute(options.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME) == "";
  178. if (!spacePreserved) {
  179. const textContent = node.textContent;
  180. let noWhitespace = noWhitespaceCollapse(element);
  181. while (noWhitespace) {
  182. element = element.parentElement;
  183. noWhitespace = element && noWhitespaceCollapse(element);
  184. }
  185. if ((!element || noWhitespace) && textContent.length > 1) {
  186. node.textContent = textContent.replace(REGEXP_WHITESPACE, getWhiteSpace(node)).replace(REGEXP_NEWLINE, "\n");
  187. }
  188. }
  189. }
  190. }
  191. function getWhiteSpace(node) {
  192. return node.parentElement && node.parentElement.tagName == "HEAD" ? "\n" : " ";
  193. }
  194. function noWhitespaceCollapse(element) {
  195. return element && !noWhitespaceCollapseElements.includes(element.tagName.toLowerCase());
  196. }
  197. function removeComments(node) {
  198. if (node.nodeType == Node_COMMENT_NODE && node.parentElement.tagName != "HTML") {
  199. return !node.textContent.toLowerCase().trim().startsWith("[if");
  200. }
  201. }
  202. function removeEmptyAttributes(node) {
  203. if (node.nodeType == Node_ELEMENT_NODE) {
  204. Array.from(node.attributes).forEach(attribute => {
  205. if (safeToRemoveAttrs.includes(attribute.name.toLowerCase())) {
  206. const attributeValue = node.getAttribute(attribute.name);
  207. if (attributeValue == "" || (attributeValue || "").match(REGEXP_ENDS_WHITESPACE)) {
  208. node.removeAttribute(attribute.name);
  209. }
  210. }
  211. });
  212. }
  213. }
  214. function removeRedundantAttributes(node) {
  215. if (node.nodeType == Node_ELEMENT_NODE) {
  216. const tagRedundantAttributes = redundantAttributes[node.tagName.toLowerCase()];
  217. if (tagRedundantAttributes) {
  218. Object.keys(tagRedundantAttributes).forEach(redundantAttributeName => {
  219. const tagRedundantAttributeValue = tagRedundantAttributes[redundantAttributeName];
  220. if (typeof tagRedundantAttributeValue == "function" ? tagRedundantAttributeValue(node) : node.getAttribute(redundantAttributeName) == tagRedundantAttributeValue) {
  221. node.removeAttribute(redundantAttributeName);
  222. }
  223. });
  224. }
  225. }
  226. }
  227. function compressJSONLD(node) {
  228. if (node.nodeType == Node_ELEMENT_NODE && node.tagName == "SCRIPT" && node.type == "application/ld+json" && node.textContent.trim()) {
  229. try {
  230. node.textContent = JSON.stringify(JSON.parse(node.textContent));
  231. } catch (error) {
  232. // ignored
  233. }
  234. }
  235. }
  236. function removeEmptyInlineElements(doc) {
  237. doc.querySelectorAll("style, script:not([src])").forEach(element => {
  238. if (!element.textContent.trim()) {
  239. element.remove();
  240. }
  241. });
  242. }