serializer.js 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global Node */
  21. this.serializer = this.serializer || (() => {
  22. const SELF_CLOSED_TAG_NAMES = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"];
  23. const OMITTED_END_TAGS = [
  24. { tagName: "li", followings: ["li"] },
  25. { tagName: "dt", followings: ["dt", "dd"] },
  26. { tagName: "dd", followings: ["dt", "dd"] },
  27. { tagName: "rt", followings: ["rt", "rp"] },
  28. { tagName: "rp", followings: ["rt", "rp"] },
  29. { tagName: "optgroup", followings: ["optgroup"] },
  30. { tagName: "option", followings: ["option", "optgroup"] },
  31. { tagName: "thead", followings: ["tbody", "tfoot"] },
  32. { tagName: "tbody", followings: ["tbody", "tfoot"] },
  33. { tagName: "tfoot" },
  34. { tagName: "tr", followings: ["tr"] },
  35. { tagName: "td", followings: ["td", "th"] },
  36. { tagName: "th", followings: ["td", "th"] },
  37. ];
  38. return {
  39. process(doc, compressHTML) {
  40. return getDoctype(doc) + (compressHTML ? serialize(doc.documentElement) : doc.documentElement.outerHTML);
  41. }
  42. };
  43. function getDoctype(doc) {
  44. const docType = doc.doctype;
  45. let docTypeString;
  46. if (docType) {
  47. docTypeString = "<!DOCTYPE " + docType.nodeName;
  48. if (docType.publicId) {
  49. docTypeString += " PUBLIC \"" + docType.publicId + "\"";
  50. if (docType.systemId)
  51. docTypeString += " \"" + docType.systemId + "\"";
  52. } else if (docType.systemId)
  53. docTypeString += " SYSTEM \"" + docType.systemId + "\"";
  54. if (docType.internalSubset)
  55. docTypeString += " [" + docType.internalSubset + "]";
  56. return docTypeString + ">\n";
  57. }
  58. return "";
  59. }
  60. function serialize(node) {
  61. if (node.nodeType == Node.TEXT_NODE) {
  62. return serializeTextNode(node);
  63. } else if (node.nodeType == Node.COMMENT_NODE) {
  64. return serializeCommentNode(node);
  65. } else if (node.nodeType == Node.ELEMENT_NODE) {
  66. return serializeElement(node);
  67. }
  68. }
  69. function serializeTextNode(textNode) {
  70. return textNode.textContent;
  71. }
  72. function serializeCommentNode(commentNode) {
  73. return "<!--" + commentNode.textContent + "-->";
  74. }
  75. function serializeElement(element) {
  76. const tagName = element.tagName.toLowerCase();
  77. let content = "<" + tagName;
  78. Array.from(element.attributes).forEach(attribute => {
  79. let value = attribute.value;
  80. if (attribute.name == "class") {
  81. value = element.classList.toString().trim();
  82. }
  83. value = value.replace(/&/g, "&amp;").replace(/\u00a0/g, "&nbsp;").replace(/"/g, "&quot;");
  84. const validUnquotedValue = value.match(/^[^ \t\n\f\r"'`=<>]+$/);
  85. content += " ";
  86. if (!attribute.namespace) {
  87. content += attribute.name;
  88. } else if (attribute.namespaceURI == "http://www.w3.org/XML/1998/namespace") {
  89. content += "xml:" + attribute.name;
  90. } else if (attribute.namespaceURI == "http://www.w3.org/2000/xmlns/") {
  91. if (attribute.name !== "xmlns") {
  92. content += "xmlns:";
  93. }
  94. content += attribute.name;
  95. } else if (attribute.namespaceURI == "http://www.w3.org/1999/xlink") {
  96. content += "xlink:" + attribute.name;
  97. } else {
  98. content += attribute.name;
  99. }
  100. content += "=";
  101. if (!validUnquotedValue) {
  102. content += "\"";
  103. }
  104. content += value;
  105. if (!validUnquotedValue) {
  106. content += "\"";
  107. }
  108. });
  109. content += ">";
  110. Array.from(element.childNodes).forEach(childNode => content += serialize(childNode));
  111. const omittedEndTag = OMITTED_END_TAGS.find(omittedEndTag => {
  112. const nextSibling = element.nextSibling;
  113. return tagName == omittedEndTag.tagName && (!nextSibling || (nextSibling.nodeType == Node.ELEMENT_NODE && omittedEndTag.followings && omittedEndTag.followings.includes(nextSibling.tagName)));
  114. });
  115. if (!omittedEndTag && !SELF_CLOSED_TAG_NAMES.includes(tagName)) {
  116. content += "</" + tagName + ">";
  117. }
  118. return content;
  119. }
  120. })();