serializer.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global Node */
  21. this.serializer = this.serializer || (() => {
  22. const SELF_CLOSED_TAG_NAMES = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"];
  23. // see https://www.w3.org/TR/html5/syntax.html#optional-tags
  24. const OMITTED_START_TAGS = [
  25. { tagName: "head", accept: element => !element.childNodes.length || element.childNodes[0].nodeType == Node.ELEMENT_NODE },
  26. { tagName: "body", accept: element => !element.childNodes.length },
  27. ];
  28. const OMITTED_END_TAGS = [
  29. { tagName: "html", accept: next => !next || next.nodeType != Node.COMMENT_NODE },
  30. { tagName: "head", accept: next => !next || next.nodeType != Node.COMMENT_NODE && (next.nodeType != Node.TEXT_NODE || !spaceFirstCharacter(next.textContent)) },
  31. { tagName: "body", accept: next => !next || next.nodeType != Node.COMMENT_NODE },
  32. { tagName: "li", accept: next => !next || ["LI"].includes(next.tagName) },
  33. { tagName: "dt", accept: next => !next || ["DT", "DD"].includes(next.tagName) },
  34. { tagName: "dd", accept: next => !next || ["DT", "DD"].includes(next.tagName) },
  35. { tagName: "rt", accept: next => !next || ["RT", "RP"].includes(next.tagName) },
  36. { tagName: "rp", accept: next => !next || ["RT", "RP"].includes(next.tagName) },
  37. { tagName: "optgroup", accept: next => !next || ["OPTGROUP"].includes(next.tagName) },
  38. { tagName: "option", accept: next => !next || ["OPTION", "OPTGROUP"].includes(next.tagName) },
  39. { tagName: "colgroup", accept: next => !next || next.nodeType != Node.COMMENT_NODE && (next.nodeType != Node.TEXT_NODE || !spaceFirstCharacter(next.textContent)) },
  40. { tagName: "caption", accept: next => !next || next.nodeType != Node.COMMENT_NODE && (next.nodeType != Node.TEXT_NODE || !spaceFirstCharacter(next.textContent)) },
  41. { tagName: "thead", accept: next => !next || ["TBODY", "TFOOT"].includes(next.tagName) },
  42. { tagName: "tbody", accept: next => !next || ["TBODY", "TFOOT"].includes(next.tagName) },
  43. { tagName: "tfoot", accept: next => !next },
  44. { tagName: "tr", accept: next => !next || ["TR"].includes(next.tagName) },
  45. { tagName: "td", accept: next => !next || ["TD", "TH"].includes(next.tagName) },
  46. { tagName: "th", accept: next => !next || ["TD", "TH"].includes(next.tagName) },
  47. ];
  48. return {
  49. process(doc, compressHTML) {
  50. return getDoctype(doc) + (compressHTML ? serialize(doc.documentElement) : doc.documentElement.outerHTML);
  51. }
  52. };
  53. function getDoctype(doc) {
  54. const docType = doc.doctype;
  55. let docTypeString;
  56. if (docType) {
  57. docTypeString = "<!DOCTYPE " + docType.nodeName;
  58. if (docType.publicId) {
  59. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  60. if (docType.systemId)
  61. docTypeString += " \"" + docType.systemId + "\"";
  62. } else if (docType.systemId)
  63. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  64. if (docType.internalSubset)
  65. docTypeString += " [" + docType.internalSubset + "]";
  66. return docTypeString + ">\n";
  67. }
  68. return "";
  69. }
  70. function serialize(node) {
  71. if (node.nodeType == Node.TEXT_NODE) {
  72. return serializeTextNode(node);
  73. } else if (node.nodeType == Node.COMMENT_NODE) {
  74. return serializeCommentNode(node);
  75. } else if (node.nodeType == Node.ELEMENT_NODE) {
  76. return serializeElement(node);
  77. }
  78. }
  79. function serializeTextNode(textNode) {
  80. return textNode.textContent;
  81. }
  82. function serializeCommentNode(commentNode) {
  83. return "<!--" + commentNode.textContent + "-->";
  84. }
  85. function serializeElement(element) {
  86. const tagName = element.tagName.toLowerCase();
  87. const omittedStartTag = OMITTED_START_TAGS.find(omittedStartTag => tagName == omittedStartTag.tagName && omittedStartTag.accept(element));
  88. let content = "";
  89. if (!omittedStartTag || element.attributes.length) {
  90. content = "<" + tagName;
  91. Array.from(element.attributes).forEach(attribute => {
  92. const name = attribute.name.replace(/["'>/=]/g, "");
  93. let value = attribute.value;
  94. if (name == "class") {
  95. value = Array.from(element.classList).map(className => className.trim()).join(" ");
  96. }
  97. value = value.replace(/&/g, "&amp;").replace(/\u00a0/g, "&nbsp;").replace(/"/g, "&quot;");
  98. const validUnquotedValue = value.match(/^[^ \t\n\f\r"'`=<>]+$/);
  99. content += " ";
  100. if (!attribute.namespace) {
  101. content += name;
  102. } else if (attribute.namespaceURI == "http://www.w3.org/XML/1998/namespace") {
  103. content += "xml:" + name;
  104. } else if (attribute.namespaceURI == "http://www.w3.org/2000/xmlns/") {
  105. if (name !== "xmlns") {
  106. content += "xmlns:";
  107. }
  108. content += name;
  109. } else if (attribute.namespaceURI == "http://www.w3.org/1999/xlink") {
  110. content += "xlink:" + name;
  111. } else {
  112. content += name;
  113. }
  114. if (value != "") {
  115. content += "=";
  116. if (!validUnquotedValue) {
  117. content += "\"";
  118. }
  119. content += value;
  120. if (!validUnquotedValue) {
  121. content += "\"";
  122. }
  123. }
  124. });
  125. content += ">";
  126. }
  127. Array.from(element.childNodes).forEach(childNode => content += serialize(childNode));
  128. const omittedEndTag = OMITTED_END_TAGS.find(omittedEndTag => {
  129. const nextSibling = element.nextSibling;
  130. return tagName == omittedEndTag.tagName && omittedEndTag.accept(nextSibling);
  131. });
  132. if (!omittedEndTag && !SELF_CLOSED_TAG_NAMES.includes(tagName)) {
  133. content += "</" + tagName + ">";
  134. }
  135. return content;
  136. }
  137. function spaceFirstCharacter(textContent) {
  138. return Boolean(textContent.charAt(0).match(/[\u0020\u0009\u000A\u000C\u000D]+/)); // eslint-disable-line no-control-regex
  139. }
  140. })();