html-srcset-parser.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. /**
  2. * Srcset Parser
  3. *
  4. * By Alex Bell | MIT License
  5. *
  6. * JS Parser for the string value that appears in markup <img srcset="here">
  7. *
  8. * @returns Array [{url: _, d: _, w: _, h:_}, ...]
  9. *
  10. * Based super duper closely on the reference algorithm at:
  11. * https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
  12. *
  13. * Most comments are copied in directly from the spec
  14. * (except for comments in parens).
  15. */
  16. // derived from https://github.com/albell/parse-srcset
  17. this.srcsetParser = this.srcsetParser || (() => {
  18. return {
  19. process
  20. };
  21. // 1. Let input be the value passed to this algorithm.
  22. function process(input) {
  23. // UTILITY FUNCTIONS
  24. // Manual is faster than RegEx
  25. // http://bjorn.tipling.com/state-and-regular-expressions-in-javascript
  26. // http://jsperf.com/whitespace-character/5
  27. function isSpace(c) {
  28. return (c === "\u0020" || // space
  29. c === "\u0009" || // horizontal tab
  30. c === "\u000A" || // new line
  31. c === "\u000C" || // form feed
  32. c === "\u000D"); // carriage return
  33. }
  34. function collectCharacters(regEx) {
  35. let chars;
  36. const match = regEx.exec(input.substring(pos));
  37. if (match) {
  38. chars = match[0];
  39. pos += chars.length;
  40. return chars;
  41. }
  42. }
  43. const inputLength = input.length;
  44. // (Don"t use \s, to avoid matching non-breaking space)
  45. /* eslint-disable no-control-regex */
  46. const regexLeadingSpaces = /^[ \t\n\r\u000c]+/;
  47. const regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/;
  48. const regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/;
  49. const regexTrailingCommas = /[,]+$/;
  50. const regexNonNegativeInteger = /^\d+$/;
  51. /* eslint-enable no-control-regex */
  52. // ( Positive or negative or unsigned integers or decimals, without or without exponents.
  53. // Must include at least one digit.
  54. // According to spec tests any decimal point must be followed by a digit.
  55. // No leading plus sign is allowed.)
  56. // https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
  57. const regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/;
  58. let url, descriptors, currentDescriptor, state, c,
  59. // 2. Let position be a pointer into input, initially pointing at the start
  60. // of the string.
  61. pos = 0;
  62. // 3. Let candidates be an initially empty source set.
  63. const candidates = [];
  64. // 4. Splitting loop: Collect a sequence of characters that are space
  65. // characters or U+002C COMMA characters. If any U+002C COMMA characters
  66. // were collected, that is a parse error.
  67. while (true) { // eslint-disable-line no-constant-condition
  68. collectCharacters(regexLeadingCommasOrSpaces);
  69. // 5. If position is past the end of input, return candidates and abort these steps.
  70. if (pos >= inputLength) {
  71. return candidates; // (we"re done, this is the sole return path)
  72. }
  73. // 6. Collect a sequence of characters that are not space characters,
  74. // and let that be url.
  75. url = collectCharacters(regexLeadingNotSpaces);
  76. // 7. Let descriptors be a new empty list.
  77. descriptors = [];
  78. // 8. If url ends with a U+002C COMMA character (,), follow these substeps:
  79. // (1). Remove all trailing U+002C COMMA characters from url. If this removed
  80. // more than one character, that is a parse error.
  81. if (url.slice(-1) === ",") {
  82. url = url.replace(regexTrailingCommas, "");
  83. // (Jump ahead to step 9 to skip tokenization and just push the candidate).
  84. parseDescriptors();
  85. // Otherwise, follow these substeps:
  86. } else {
  87. tokenize();
  88. } // (close else of step 8)
  89. // 16. Return to the step labeled splitting loop.
  90. } // (Close of big while loop.)
  91. /**
  92. * Tokenizes descriptor properties prior to parsing
  93. * Returns undefined.
  94. */
  95. function tokenize() {
  96. // 8.1. Descriptor tokeniser: Skip whitespace
  97. collectCharacters(regexLeadingSpaces);
  98. // 8.2. Let current descriptor be the empty string.
  99. currentDescriptor = "";
  100. // 8.3. Let state be in descriptor.
  101. state = "in descriptor";
  102. while (true) { // eslint-disable-line no-constant-condition
  103. // 8.4. Let c be the character at position.
  104. c = input.charAt(pos);
  105. // Do the following depending on the value of state.
  106. // For the purpose of this step, "EOF" is a special character representing
  107. // that position is past the end of input.
  108. // In descriptor
  109. if (state === "in descriptor") {
  110. // Do the following, depending on the value of c:
  111. // Space character
  112. // If current descriptor is not empty, append current descriptor to
  113. // descriptors and let current descriptor be the empty string.
  114. // Set state to after descriptor.
  115. if (isSpace(c)) {
  116. if (currentDescriptor) {
  117. descriptors.push(currentDescriptor);
  118. currentDescriptor = "";
  119. state = "after descriptor";
  120. }
  121. // U+002C COMMA (,)
  122. // Advance position to the next character in input. If current descriptor
  123. // is not empty, append current descriptor to descriptors. Jump to the step
  124. // labeled descriptor parser.
  125. } else if (c === ",") {
  126. pos += 1;
  127. if (currentDescriptor) {
  128. descriptors.push(currentDescriptor);
  129. }
  130. parseDescriptors();
  131. return;
  132. // U+0028 LEFT PARENTHESIS (()
  133. // Append c to current descriptor. Set state to in parens.
  134. } else if (c === "\u0028") {
  135. currentDescriptor = currentDescriptor + c;
  136. state = "in parens";
  137. // EOF
  138. // If current descriptor is not empty, append current descriptor to
  139. // descriptors. Jump to the step labeled descriptor parser.
  140. } else if (c === "") {
  141. if (currentDescriptor) {
  142. descriptors.push(currentDescriptor);
  143. }
  144. parseDescriptors();
  145. return;
  146. // Anything else
  147. // Append c to current descriptor.
  148. } else {
  149. currentDescriptor = currentDescriptor + c;
  150. }
  151. // (end "in descriptor"
  152. // In parens
  153. } else if (state === "in parens") {
  154. // U+0029 RIGHT PARENTHESIS ())
  155. // Append c to current descriptor. Set state to in descriptor.
  156. if (c === ")") {
  157. currentDescriptor = currentDescriptor + c;
  158. state = "in descriptor";
  159. // EOF
  160. // Append current descriptor to descriptors. Jump to the step labeled
  161. // descriptor parser.
  162. } else if (c === "") {
  163. descriptors.push(currentDescriptor);
  164. parseDescriptors();
  165. return;
  166. // Anything else
  167. // Append c to current descriptor.
  168. } else {
  169. currentDescriptor = currentDescriptor + c;
  170. }
  171. // After descriptor
  172. } else if (state === "after descriptor") {
  173. // Do the following, depending on the value of c:
  174. // Space character: Stay in this state.
  175. if (isSpace(c)) {
  176. // EOF: Jump to the step labeled descriptor parser.
  177. } else if (c === "") {
  178. parseDescriptors();
  179. return;
  180. // Anything else
  181. // Set state to in descriptor. Set position to the previous character in input.
  182. } else {
  183. state = "in descriptor";
  184. pos -= 1;
  185. }
  186. }
  187. // Advance position to the next character in input.
  188. pos += 1;
  189. // Repeat this step.
  190. } // (close while true loop)
  191. }
  192. /**
  193. * Adds descriptor properties to a candidate, pushes to the candidates array
  194. * @return undefined
  195. */
  196. // Declared outside of the while loop so that it"s only created once.
  197. function parseDescriptors() {
  198. // 9. Descriptor parser: Let error be no.
  199. let pError = false,
  200. // 10. Let width be absent.
  201. // 11. Let density be absent.
  202. // 12. Let future-compat-h be absent. (We"re implementing it now as h)
  203. w, d, h, i,
  204. desc, lastChar, value, intVal, floatVal;
  205. const candidate = {};
  206. // 13. For each descriptor in descriptors, run the appropriate set of steps
  207. // from the following list:
  208. for (i = 0; i < descriptors.length; i++) {
  209. desc = descriptors[i];
  210. lastChar = desc[desc.length - 1];
  211. value = desc.substring(0, desc.length - 1);
  212. intVal = parseInt(value, 10);
  213. floatVal = parseFloat(value);
  214. // If the descriptor consists of a valid non-negative integer followed by
  215. // a U+0077 LATIN SMALL LETTER W character
  216. if (regexNonNegativeInteger.test(value) && (lastChar === "w")) {
  217. // If width and density are not both absent, then let error be yes.
  218. if (w || d) { pError = true; }
  219. // Apply the rules for parsing non-negative integers to the descriptor.
  220. // If the result is zero, let error be yes.
  221. // Otherwise, let width be the result.
  222. if (intVal === 0) { pError = true; } else { w = intVal; }
  223. // If the descriptor consists of a valid floating-point number followed by
  224. // a U+0078 LATIN SMALL LETTER X character
  225. } else if (regexFloatingPoint.test(value) && (lastChar === "x")) {
  226. // If width, density and future-compat-h are not all absent, then let error
  227. // be yes.
  228. if (w || d || h) { pError = true; }
  229. // Apply the rules for parsing floating-point number values to the descriptor.
  230. // If the result is less than zero, let error be yes. Otherwise, let density
  231. // be the result.
  232. if (floatVal < 0) { pError = true; } else { d = floatVal; }
  233. // If the descriptor consists of a valid non-negative integer followed by
  234. // a U+0068 LATIN SMALL LETTER H character
  235. } else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) {
  236. // If height and density are not both absent, then let error be yes.
  237. if (h || d) { pError = true; }
  238. // Apply the rules for parsing non-negative integers to the descriptor.
  239. // If the result is zero, let error be yes. Otherwise, let future-compat-h
  240. // be the result.
  241. if (intVal === 0) { pError = true; } else { h = intVal; }
  242. // Anything else, Let error be yes.
  243. } else { pError = true; }
  244. } // (close step 13 for loop)
  245. // 15. If error is still no, then append a new image source to candidates whose
  246. // URL is url, associated with a width width if not absent and a pixel
  247. // density density if not absent. Otherwise, there is a parse error.
  248. if (!pError) {
  249. candidate.url = url;
  250. if (w) { candidate.w = w; }
  251. if (d) { candidate.d = d; }
  252. if (h) { candidate.h = h; }
  253. candidates.push(candidate);
  254. } else if (console && console.log) { // eslint-disable-line no-console
  255. console.log("Invalid srcset descriptor found in \"" + input + "\" at \"" + desc + "\"."); // eslint-disable-line no-console
  256. }
  257. } // (close parseDescriptors fn)
  258. }
  259. })();