html-minifier.js 7.7 KB


  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. // Derived from the work of Kirill Maltsev - https://github.com/posthtml/htmlnano
  24. this.singlefile.lib.modules.htmlMinifier = this.singlefile.lib.modules.htmlMinifier || (() => {
  25. // Source: https://github.com/kangax/html-minifier/issues/63
  26. const booleanAttributes = [
  27. "allowfullscreen",
  28. "async",
  29. "autofocus",
  30. "autoplay",
  31. "checked",
  32. "compact",
  33. "controls",
  34. "declare",
  35. "default",
  36. "defaultchecked",
  37. "defaultmuted",
  38. "defaultselected",
  39. "defer",
  40. "disabled",
  41. "enabled",
  42. "formnovalidate",
  43. "hidden",
  44. "indeterminate",
  45. "inert",
  46. "ismap",
  47. "itemscope",
  48. "loop",
  49. "multiple",
  50. "muted",
  51. "nohref",
  52. "noresize",
  53. "noshade",
  54. "novalidate",
  55. "nowrap",
  56. "open",
  57. "pauseonexit",
  58. "readonly",
  59. "required",
  60. "reversed",
  61. "scoped",
  62. "seamless",
  63. "selected",
  64. "sortable",
  65. "truespeed",
  66. "typemustmatch",
  67. "visible"
  68. ];
  69. const noWhitespaceCollapseElements = ["script", "style", "pre", "textarea"];
  70. // Source: https://www.w3.org/TR/html4/sgml/dtd.html#events (Generic Attributes)
  71. const safeToRemoveAttrs = [
  72. "id",
  73. "class",
  74. "style",
  75. "lang",
  76. "dir",
  77. "onclick",
  78. "ondblclick",
  79. "onmousedown",
  80. "onmouseup",
  81. "onmouseover",
  82. "onmousemove",
  83. "onmouseout",
  84. "onkeypress",
  85. "onkeydown",
  86. "onkeyup"
  87. ];
  88. const redundantAttributes = {
  89. "form": {
  90. "method": "get"
  91. },
  92. "script": {
  93. "language": "javascript",
  94. "type": "text/javascript",
  95. // Remove attribute if the function returns false
  96. "charset": node => {
  97. // The charset attribute only really makes sense on “external” SCRIPT elements:
  98. // http://perfectionkills.com/optimizing-html/#8_script_charset
  99. return !node.getAttribute("src");
  100. }
  101. },
  102. "style": {
  103. "media": "all",
  104. "type": "text/css"
  105. },
  106. "link": {
  107. "media": "all"
  108. }
  109. };
  110. const REGEXP_WHITESPACE = /[ \t\f\r]+/g;
  111. const REGEXP_NEWLINE = /[\n]+/g;
  112. const REGEXP_ENDS_WHITESPACE = /^\s+$/;
  113. const NodeFilter_SHOW_ALL = 4294967295;
  114. const Node_ELEMENT_NODE = 1;
  115. const Node_TEXT_NODE = 3;
  116. const Node_COMMENT_NODE = 8;
  117. const modules = [
  118. collapseBooleanAttributes,
  119. mergeTextNodes,
  120. collapseWhitespace,
  121. removeComments,
  122. removeEmptyAttributes,
  123. removeRedundantAttributes,
  124. compressJSONLD,
  125. node => mergeElements(node, "style", (node, previousSibling) => node.parentElement && node.parentElement.tagName == "HEAD" && node.media == previousSibling.media && node.title == previousSibling.title)
  126. ];
  127. return {
  128. process
  129. };
  130. function process(doc, options) {
  131. removeEmptyInlineElements(doc);
  132. const nodesWalker = doc.createTreeWalker(doc.documentElement, NodeFilter_SHOW_ALL, null, false);
  133. let node = nodesWalker.nextNode();
  134. while (node) {
  135. const deletedNode = modules.find(module => module(node, options));
  136. const previousNode = node;
  137. node = nodesWalker.nextNode();
  138. if (deletedNode) {
  139. previousNode.remove();
  140. }
  141. }
  142. }
  143. function collapseBooleanAttributes(node) {
  144. if (node.nodeType == Node_ELEMENT_NODE) {
  145. Array.from(node.attributes).forEach(attribute => {
  146. if (booleanAttributes.includes(attribute.name)) {
  147. node.setAttribute(attribute.name, "");
  148. }
  149. });
  150. }
  151. }
  152. function mergeTextNodes(node) {
  153. if (node.nodeType == Node_TEXT_NODE) {
  154. if (node.previousSibling && node.previousSibling.nodeType == Node_TEXT_NODE) {
  155. node.textContent = node.previousSibling.textContent + node.textContent;
  156. node.previousSibling.remove();
  157. }
  158. }
  159. }
  160. function mergeElements(node, tagName, acceptMerge) {
  161. if (node.nodeType == Node_ELEMENT_NODE && node.tagName.toLowerCase() == tagName.toLowerCase()) {
  162. let previousSibling = node.previousSibling;
  163. const previousSiblings = [];
  164. while (previousSibling && previousSibling.nodeType == Node_TEXT_NODE && !previousSibling.textContent.trim()) {
  165. previousSiblings.push(previousSibling);
  166. previousSibling = previousSibling.previousSibling;
  167. }
  168. if (previousSibling && previousSibling.nodeType == Node_ELEMENT_NODE && previousSibling.tagName == node.tagName && acceptMerge(node, previousSibling)) {
  169. node.textContent = previousSibling.textContent + node.textContent;
  170. previousSiblings.forEach(node => node.remove());
  171. previousSibling.remove();
  172. }
  173. }
  174. }
  175. function collapseWhitespace(node, options) {
  176. if (node.nodeType == Node_TEXT_NODE) {
  177. let element = node.parentElement;
  178. const spacePreserved = element.getAttribute(options.PRESERVED_SPACE_ELEMENT_ATTRIBUTE_NAME) == "";
  179. if (!spacePreserved) {
  180. const textContent = node.textContent;
  181. let noWhitespace = noWhitespaceCollapse(element);
  182. while (noWhitespace) {
  183. element = element.parentElement;
  184. noWhitespace = element && noWhitespaceCollapse(element);
  185. }
  186. if ((!element || noWhitespace) && textContent.length > 1) {
  187. node.textContent = textContent.replace(REGEXP_WHITESPACE, getWhiteSpace(node)).replace(REGEXP_NEWLINE, "\n");
  188. }
  189. }
  190. }
  191. }
  192. function getWhiteSpace(node) {
  193. return node.parentElement && node.parentElement.tagName == "HEAD" ? "\n" : " ";
  194. }
  195. function noWhitespaceCollapse(element) {
  196. return element && !noWhitespaceCollapseElements.includes(element.tagName.toLowerCase());
  197. }
  198. function removeComments(node) {
  199. if (node.nodeType == Node_COMMENT_NODE && node.parentElement.tagName != "HTML") {
  200. return !node.textContent.toLowerCase().trim().startsWith("[if");
  201. }
  202. }
  203. function removeEmptyAttributes(node) {
  204. if (node.nodeType == Node_ELEMENT_NODE) {
  205. Array.from(node.attributes).forEach(attribute => {
  206. if (safeToRemoveAttrs.includes(attribute.name.toLowerCase())) {
  207. const attributeValue = node.getAttribute(attribute.name);
  208. if (attributeValue == "" || (attributeValue || "").match(REGEXP_ENDS_WHITESPACE)) {
  209. node.removeAttribute(attribute.name);
  210. }
  211. }
  212. });
  213. }
  214. }
  215. function removeRedundantAttributes(node) {
  216. if (node.nodeType == Node_ELEMENT_NODE) {
  217. const tagRedundantAttributes = redundantAttributes[node.tagName.toLowerCase()];
  218. if (tagRedundantAttributes) {
  219. Object.keys(tagRedundantAttributes).forEach(redundantAttributeName => {
  220. const tagRedundantAttributeValue = tagRedundantAttributes[redundantAttributeName];
  221. if (typeof tagRedundantAttributeValue == "function" ? tagRedundantAttributeValue(node) : node.getAttribute(redundantAttributeName) == tagRedundantAttributeValue) {
  222. node.removeAttribute(redundantAttributeName);
  223. }
  224. });
  225. }
  226. }
  227. }
  228. function compressJSONLD(node) {
  229. if (node.nodeType == Node_ELEMENT_NODE && node.tagName == "SCRIPT" && node.type == "application/ld+json" && node.textContent.trim()) {
  230. try {
  231. node.textContent = JSON.stringify(JSON.parse(node.textContent));
  232. } catch (error) {
  233. // ignored
  234. }
  235. }
  236. }
  237. function removeEmptyInlineElements(doc) {
  238. doc.querySelectorAll("style, script:not([src])").forEach(element => {
  239. if (!element.textContent.trim()) {
  240. element.remove();
  241. }
  242. });
  243. }
  244. })();