html-srcset-parser.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /**
  2. * Srcset Parser
  3. *
  4. * By Alex Bell | MIT License
  5. *
  6. * JS Parser for the string value that appears in markup <img srcset="here">
  7. *
  8. * @returns Array [{url: _, d: _, w: _, h:_}, ...]
  9. *
  10. * Based super duper closely on the reference algorithm at:
  11. * https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
  12. *
  13. * Most comments are copied in directly from the spec
  14. * (except for comments in parens).
  15. */
  16. this.srcsetParser = this.srcsetParser || (() => {
  17. return {
  18. process
  19. };
  20. // 1. Let input be the value passed to this algorithm.
  21. function process(input) {
  22. // UTILITY FUNCTIONS
  23. // Manual is faster than RegEx
  24. // http://bjorn.tipling.com/state-and-regular-expressions-in-javascript
  25. // http://jsperf.com/whitespace-character/5
  26. function isSpace(c) {
  27. return (c === "\u0020" || // space
  28. c === "\u0009" || // horizontal tab
  29. c === "\u000A" || // new line
  30. c === "\u000C" || // form feed
  31. c === "\u000D"); // carriage return
  32. }
  33. function collectCharacters(regEx) {
  34. let chars;
  35. const match = regEx.exec(input.substring(pos));
  36. if (match) {
  37. chars = match[0];
  38. pos += chars.length;
  39. return chars;
  40. }
  41. }
  42. const inputLength = input.length;
  43. // (Don"t use \s, to avoid matching non-breaking space)
  44. /* eslint-disable no-control-regex */
  45. const regexLeadingSpaces = /^[ \t\n\r\u000c]+/;
  46. const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
  47. const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
  48. const regexTrailingCommas = /[,]+$/;
  49. const regexNonNegativeInteger = /^\d+$/;
  50. /* eslint-enable no-control-regex */
  51. // ( Positive or negative or unsigned integers or decimals, without or without exponents.
  52. // Must include at least one digit.
  53. // According to spec tests any decimal point must be followed by a digit.
  54. // No leading plus sign is allowed.)
  55. // https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
  56. const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
  57. let url, descriptors, currentDescriptor, state, c,
  58. // 2. Let position be a pointer into input, initially pointing at the start
  59. // of the string.
  60. pos = 0;
  61. // 3. Let candidates be an initially empty source set.
  62. const candidates = [];
  63. // 4. Splitting loop: Collect a sequence of characters that are space
  64. // characters or U+002C COMMA characters. If any U+002C COMMA characters
  65. // were collected, that is a parse error.
  66. while (true) { // eslint-disable-line no-constant-condition
  67. collectCharacters(regexLeadingCommasOrSpaces);
  68. // 5. If position is past the end of input, return candidates and abort these steps.
  69. if (pos >= inputLength) {
  70. return candidates; // (we"re done, this is the sole return path)
  71. }
  72. // 6. Collect a sequence of characters that are not space characters,
  73. // and let that be url.
  74. url = collectCharacters(regexLeadingNotSpaces);
  75. // 7. Let descriptors be a new empty list.
  76. descriptors = [];
  77. // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
  78. // (1). Remove all trailing U+002C COMMA characters from url. If this removed
  79. // more than one character, that is a parse error.
  80. if (url.slice(-1) === ",") {
  81. url = url.replace(regexTrailingCommas, "");
  82. // (Jump ahead to step 9 to skip tokenization and just push the candidate).
  83. parseDescriptors();
  84. // Otherwise, follow these substeps:
  85. } else {
  86. tokenize();
  87. } // (close else of step 8)
  88. // 16. Return to the step labeled splitting loop.
  89. } // (Close of big while loop.)
  90. /**
  91. * Tokenizes descriptor properties prior to parsing
  92. * Returns undefined.
  93. */
  94. function tokenize() {
  95. // 8.1. Descriptor tokeniser: Skip whitespace
  96. collectCharacters(regexLeadingSpaces);
  97. // 8.2. Let current descriptor be the empty string.
  98. currentDescriptor = "";
  99. // 8.3. Let state be in descriptor.
  100. state = "in descriptor";
  101. while (true) { // eslint-disable-line no-constant-condition
  102. // 8.4. Let c be the character at position.
  103. c = input.charAt(pos);
  104. // Do the following depending on the value of state.
  105. // For the purpose of this step, "EOF" is a special character representing
  106. // that position is past the end of input.
  107. // In descriptor
  108. if (state === "in descriptor") {
  109. // Do the following, depending on the value of c:
  110. // Space character
  111. // If current descriptor is not empty, append current descriptor to
  112. // descriptors and let current descriptor be the empty string.
  113. // Set state to after descriptor.
  114. if (isSpace(c)) {
  115. if (currentDescriptor) {
  116. descriptors.push(currentDescriptor);
  117. currentDescriptor = "";
  118. state = "after descriptor";
  119. }
  120. // U+002C COMMA (,)
  121. // Advance position to the next character in input. If current descriptor
  122. // is not empty, append current descriptor to descriptors. Jump to the step
  123. // labeled descriptor parser.
  124. } else if (c === ",") {
  125. pos += 1;
  126. if (currentDescriptor) {
  127. descriptors.push(currentDescriptor);
  128. }
  129. parseDescriptors();
  130. return;
  131. // U+0028 LEFT PARENTHESIS (()
  132. // Append c to current descriptor. Set state to in parens.
  133. } else if (c === "\u0028") {
  134. currentDescriptor = currentDescriptor + c;
  135. state = "in parens";
  136. // EOF
  137. // If current descriptor is not empty, append current descriptor to
  138. // descriptors. Jump to the step labeled descriptor parser.
  139. } else if (c === "") {
  140. if (currentDescriptor) {
  141. descriptors.push(currentDescriptor);
  142. }
  143. parseDescriptors();
  144. return;
  145. // Anything else
  146. // Append c to current descriptor.
  147. } else {
  148. currentDescriptor = currentDescriptor + c;
  149. }
  150. // (end "in descriptor"
  151. // In parens
  152. } else if (state === "in parens") {
  153. // U+0029 RIGHT PARENTHESIS ())
  154. // Append c to current descriptor. Set state to in descriptor.
  155. if (c === ")") {
  156. currentDescriptor = currentDescriptor + c;
  157. state = "in descriptor";
  158. // EOF
  159. // Append current descriptor to descriptors. Jump to the step labeled
  160. // descriptor parser.
  161. } else if (c === "") {
  162. descriptors.push(currentDescriptor);
  163. parseDescriptors();
  164. return;
  165. // Anything else
  166. // Append c to current descriptor.
  167. } else {
  168. currentDescriptor = currentDescriptor + c;
  169. }
  170. // After descriptor
  171. } else if (state === "after descriptor") {
  172. // Do the following, depending on the value of c:
  173. // Space character: Stay in this state.
  174. if (isSpace(c)) {
  175. // EOF: Jump to the step labeled descriptor parser.
  176. } else if (c === "") {
  177. parseDescriptors();
  178. return;
  179. // Anything else
  180. // Set state to in descriptor. Set position to the previous character in input.
  181. } else {
  182. state = "in descriptor";
  183. pos -= 1;
  184. }
  185. }
  186. // Advance position to the next character in input.
  187. pos += 1;
  188. // Repeat this step.
  189. } // (close while true loop)
  190. }
  191. /**
  192. * Adds descriptor properties to a candidate, pushes to the candidates array
  193. * @return undefined
  194. */
  195. // Declared outside of the while loop so that it"s only created once.
  196. function parseDescriptors() {
  197. // 9. Descriptor parser: Let error be no.
  198. let pError = false,
  199. // 10. Let width be absent.
  200. // 11. Let density be absent.
  201. // 12. Let future-compat-h be absent. (We"re implementing it now as h)
  202. w, d, h, i,
  203. desc, lastChar, value, intVal, floatVal;
  204. const candidate = {};
  205. // 13. For each descriptor in descriptors, run the appropriate set of steps
  206. // from the following list:
  207. for (i = 0; i < descriptors.length; i++) {
  208. desc = descriptors[i];
  209. lastChar = desc[desc.length - 1];
  210. value = desc.substring(0, desc.length - 1);
  211. intVal = parseInt(value, 10);
  212. floatVal = parseFloat(value);
  213. // If the descriptor consists of a valid non-negative integer followed by
  214. // a U+0077 LATIN SMALL LETTER W character
  215. if (regexNonNegativeInteger.test(value) && (lastChar === "w")) {
  216. // If width and density are not both absent, then let error be yes.
  217. if (w || d) { pError = true; }
  218. // Apply the rules for parsing non-negative integers to the descriptor.
  219. // If the result is zero, let error be yes.
  220. // Otherwise, let width be the result.
  221. if (intVal === 0) { pError = true; } else { w = intVal; }
  222. // If the descriptor consists of a valid floating-point number followed by
  223. // a U+0078 LATIN SMALL LETTER X character
  224. } else if (regexFloatingPoint.test(value) && (lastChar === "x")) {
  225. // If width, density and future-compat-h are not all absent, then let error
  226. // be yes.
  227. if (w || d || h) { pError = true; }
  228. // Apply the rules for parsing floating-point number values to the descriptor.
  229. // If the result is less than zero, let error be yes. Otherwise, let density
  230. // be the result.
  231. if (floatVal < 0) { pError = true; } else { d = floatVal; }
  232. // If the descriptor consists of a valid non-negative integer followed by
  233. // a U+0068 LATIN SMALL LETTER H character
  234. } else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) {
  235. // If height and density are not both absent, then let error be yes.
  236. if (h || d) { pError = true; }
  237. // Apply the rules for parsing non-negative integers to the descriptor.
  238. // If the result is zero, let error be yes. Otherwise, let future-compat-h
  239. // be the result.
  240. if (intVal === 0) { pError = true; } else { h = intVal; }
  241. // Anything else, Let error be yes.
  242. } else { pError = true; }
  243. } // (close step 13 for loop)
  244. // 15. If error is still no, then append a new image source to candidates whose
  245. // URL is url, associated with a width width if not absent and a pixel
  246. // density density if not absent. Otherwise, there is a parse error.
  247. if (!pError) {
  248. candidate.url = url;
  249. if (w) { candidate.w = w; }
  250. if (d) { candidate.d = d; }
  251. if (h) { candidate.h = h; }
  252. candidates.push(candidate);
  253. } else if (console && console.log) { // eslint-disable-line no-console
  254. console.log("Invalid srcset descriptor found in \"" + input + "\" at \"" + desc + "\"."); // eslint-disable-line no-console
  255. }
  256. } // (close parseDescriptors fn)
  257. }
  258. })();