7 роки тому · 2563a00f0e
--- a/lib/single-file/parse-srcset.js
+++ b/lib/single-file/parse-srcset.js
@@ -0,0 +1,330 @@
 
				+/**
			
 
				+ * Srcset Parser
			
 
				+ *
			
 
				+ * By Alex Bell |  MIT License
			
 
				+ *
			
 
				+ * JS Parser for the string value that appears in markup <img srcset="here">
			
 
				+ *
			
 
				+ * @returns Array [{url: _, d: _, w: _, h:_}, ...]
			
 
				+ *
			
 
				+ * Based super duper closely on the reference algorithm at:
			
 
				+ * https://html.spec.whatwg.org/multipage/embedded-content.html#parse-a-srcset-attribute
			
 
				+ *
			
 
				+ * Most comments are copied in directly from the spec
			
 
				+ * (except for comments in parens).
			
 
				+ */
			
 
				+
			
 
				+/* global window */
			
 
				+
			
 
				+(function (root, factory) {
			
 
				+	if (typeof module === "object" && module.exports) {
			
 
				+		// Node. Does not work with strict CommonJS, but
			
 
				+		// only CommonJS-like environments that support module.exports,
			
 
				+		// like Node.
			
 
				+		module.exports = factory();
			
 
				+	} else {
			
 
				+		// Browser globals (root is window)
			
 
				+		root.parseSrcset = factory();
			
 
				+	}
			
 
				+}(window, function () {
			
 
				+
			
 
				+	// 1. Let input be the value passed to this algorithm.
			
 
				+	return function (input) {
			
 
				+
			
 
				+		// UTILITY FUNCTIONS
			
 
				+
			
 
				+		// Manual is faster than RegEx
			
 
				+		// http://bjorn.tipling.com/state-and-regular-expressions-in-javascript
			
 
				+		// http://jsperf.com/whitespace-character/5
			
 
				+		function isSpace(c) {
			
 
				+			return (c === "\u0020" || // space
			
 
				+				c === "\u0009" || // horizontal tab
			
 
				+				c === "\u000A" || // new line
			
 
				+				c === "\u000C" || // form feed
			
 
				+				c === "\u000D");  // carriage return
			
 
				+		}
			
 
				+
			
 
				+		function collectCharacters(regEx) {
			
 
				+			var chars,
			
 
				+				match = regEx.exec(input.substring(pos));
			
 
				+			if (match) {
			
 
				+				chars = match[0];
			
 
				+				pos += chars.length;
			
 
				+				return chars;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		var inputLength = input.length,
			
 
				+
			
 
				+			// (Don"t use \s, to avoid matching non-breaking space)
			
 
				+			/* eslint-disable no-control-regex */
			
 
				+			regexLeadingSpaces = /^[ \t\n\r\u000c]+/, // 
			
 
				+			regexLeadingCommasOrSpaces = /^[, \t\n\r\u000c]+/,
			
 
				+			regexLeadingNotSpaces = /^[^ \t\n\r\u000c]+/,
			
 
				+			regexTrailingCommas = /[,]+$/,
			
 
				+			regexNonNegativeInteger = /^\d+$/,
			
 
				+			/* eslint-enable no-control-regex */
			
 
				+
			
 
				+			// ( Positive or negative or unsigned integers or decimals, without or without exponents.
			
 
				+			// Must include at least one digit.
			
 
				+			// According to spec tests any decimal point must be followed by a digit.
			
 
				+			// No leading plus sign is allowed.)
			
 
				+			// https://html.spec.whatwg.org/multipage/infrastructure.html#valid-floating-point-number
			
 
				+			regexFloatingPoint = /^-?(?:[0-9]+|[0-9]*\.[0-9]+)(?:[eE][+-]?[0-9]+)?$/,
			
 
				+
			
 
				+			url,
			
 
				+			descriptors,
			
 
				+			currentDescriptor,
			
 
				+			state,
			
 
				+			c,
			
 
				+
			
 
				+			// 2. Let position be a pointer into input, initially pointing at the start
			
 
				+			//    of the string.
			
 
				+			pos = 0,
			
 
				+
			
 
				+			// 3. Let candidates be an initially empty source set.
			
 
				+			candidates = [];
			
 
				+
			
 
				+		// 4. Splitting loop: Collect a sequence of characters that are space
			
 
				+		//    characters or U+002C COMMA characters. If any U+002C COMMA characters
			
 
				+		//    were collected, that is a parse error.		
			
 
				+		while (true) { // eslint-disable-line no-constant-condition
			
 
				+			collectCharacters(regexLeadingCommasOrSpaces);
			
 
				+
			
 
				+			// 5. If position is past the end of input, return candidates and abort these steps.
			
 
				+			if (pos >= inputLength) {
			
 
				+				return candidates; // (we"re done, this is the sole return path)
			
 
				+			}
			
 
				+
			
 
				+			// 6. Collect a sequence of characters that are not space characters,
			
 
				+			//    and let that be url.
			
 
				+			url = collectCharacters(regexLeadingNotSpaces);
			
 
				+
			
 
				+			// 7. Let descriptors be a new empty list.
			
 
				+			descriptors = [];
			
 
				+
			
 
				+			// 8. If url ends with a U+002C COMMA character (,), follow these substeps:
			
 
				+			//		(1). Remove all trailing U+002C COMMA characters from url. If this removed
			
 
				+			//         more than one character, that is a parse error.
			
 
				+			if (url.slice(-1) === ",") {
			
 
				+				url = url.replace(regexTrailingCommas, "");
			
 
				+				// (Jump ahead to step 9 to skip tokenization and just push the candidate).
			
 
				+				parseDescriptors();
			
 
				+
			
 
				+				//	Otherwise, follow these substeps:
			
 
				+			} else {
			
 
				+				tokenize();
			
 
				+			} // (close else of step 8)
			
 
				+
			
 
				+			// 16. Return to the step labeled splitting loop.
			
 
				+		} // (Close of big while loop.)
			
 
				+
			
 
				+		/**
			
 
				+		 * Tokenizes descriptor properties prior to parsing
			
 
				+		 * Returns undefined.
			
 
				+		 */
			
 
				+		function tokenize() {
			
 
				+
			
 
				+			// 8.1. Descriptor tokeniser: Skip whitespace
			
 
				+			collectCharacters(regexLeadingSpaces);
			
 
				+
			
 
				+			// 8.2. Let current descriptor be the empty string.
			
 
				+			currentDescriptor = "";
			
 
				+
			
 
				+			// 8.3. Let state be in descriptor.
			
 
				+			state = "in descriptor";
			
 
				+
			
 
				+			while (true) { // eslint-disable-line no-constant-condition
			
 
				+
			
 
				+				// 8.4. Let c be the character at position.
			
 
				+				c = input.charAt(pos);
			
 
				+
			
 
				+				//  Do the following depending on the value of state.
			
 
				+				//  For the purpose of this step, "EOF" is a special character representing
			
 
				+				//  that position is past the end of input.
			
 
				+
			
 
				+				// In descriptor
			
 
				+				if (state === "in descriptor") {
			
 
				+					// Do the following, depending on the value of c:
			
 
				+
			
 
				+					// Space character
			
 
				+					// If current descriptor is not empty, append current descriptor to
			
 
				+					// descriptors and let current descriptor be the empty string.
			
 
				+					// Set state to after descriptor.
			
 
				+					if (isSpace(c)) {
			
 
				+						if (currentDescriptor) {
			
 
				+							descriptors.push(currentDescriptor);
			
 
				+							currentDescriptor = "";
			
 
				+							state = "after descriptor";
			
 
				+						}
			
 
				+
			
 
				+						// U+002C COMMA (,)
			
 
				+						// Advance position to the next character in input. If current descriptor
			
 
				+						// is not empty, append current descriptor to descriptors. Jump to the step
			
 
				+						// labeled descriptor parser.
			
 
				+					} else if (c === ",") {
			
 
				+						pos += 1;
			
 
				+						if (currentDescriptor) {
			
 
				+							descriptors.push(currentDescriptor);
			
 
				+						}
			
 
				+						parseDescriptors();
			
 
				+						return;
			
 
				+
			
 
				+						// U+0028 LEFT PARENTHESIS (()
			
 
				+						// Append c to current descriptor. Set state to in parens.
			
 
				+					} else if (c === "\u0028") {
			
 
				+						currentDescriptor = currentDescriptor + c;
			
 
				+						state = "in parens";
			
 
				+
			
 
				+						// EOF
			
 
				+						// If current descriptor is not empty, append current descriptor to
			
 
				+						// descriptors. Jump to the step labeled descriptor parser.
			
 
				+					} else if (c === "") {
			
 
				+						if (currentDescriptor) {
			
 
				+							descriptors.push(currentDescriptor);
			
 
				+						}
			
 
				+						parseDescriptors();
			
 
				+						return;
			
 
				+
			
 
				+						// Anything else
			
 
				+						// Append c to current descriptor.
			
 
				+					} else {
			
 
				+						currentDescriptor = currentDescriptor + c;
			
 
				+					}
			
 
				+					// (end "in descriptor"
			
 
				+
			
 
				+					// In parens
			
 
				+				} else if (state === "in parens") {
			
 
				+
			
 
				+					// U+0029 RIGHT PARENTHESIS ())
			
 
				+					// Append c to current descriptor. Set state to in descriptor.
			
 
				+					if (c === ")") {
			
 
				+						currentDescriptor = currentDescriptor + c;
			
 
				+						state = "in descriptor";
			
 
				+
			
 
				+						// EOF
			
 
				+						// Append current descriptor to descriptors. Jump to the step labeled
			
 
				+						// descriptor parser.
			
 
				+					} else if (c === "") {
			
 
				+						descriptors.push(currentDescriptor);
			
 
				+						parseDescriptors();
			
 
				+						return;
			
 
				+
			
 
				+						// Anything else
			
 
				+						// Append c to current descriptor.
			
 
				+					} else {
			
 
				+						currentDescriptor = currentDescriptor + c;
			
 
				+					}
			
 
				+
			
 
				+					// After descriptor
			
 
				+				} else if (state === "after descriptor") {
			
 
				+
			
 
				+					// Do the following, depending on the value of c:
			
 
				+					// Space character: Stay in this state.
			
 
				+					if (isSpace(c)) {
			
 
				+
			
 
				+						// EOF: Jump to the step labeled descriptor parser.
			
 
				+					} else if (c === "") {
			
 
				+						parseDescriptors();
			
 
				+						return;
			
 
				+
			
 
				+						// Anything else
			
 
				+						// Set state to in descriptor. Set position to the previous character in input.
			
 
				+					} else {
			
 
				+						state = "in descriptor";
			
 
				+						pos -= 1;
			
 
				+
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				// Advance position to the next character in input.
			
 
				+				pos += 1;
			
 
				+
			
 
				+				// Repeat this step.
			
 
				+			} // (close while true loop)
			
 
				+		}
			
 
				+
			
 
				+		/**
			
 
				+		 * Adds descriptor properties to a candidate, pushes to the candidates array
			
 
				+		 * @return undefined
			
 
				+		 */
			
 
				+		// Declared outside of the while loop so that it"s only created once.
			
 
				+		function parseDescriptors() {
			
 
				+
			
 
				+			// 9. Descriptor parser: Let error be no.
			
 
				+			var pError = false,
			
 
				+
			
 
				+				// 10. Let width be absent.
			
 
				+				// 11. Let density be absent.
			
 
				+				// 12. Let future-compat-h be absent. (We"re implementing it now as h)
			
 
				+				w, d, h, i,
			
 
				+				candidate = {},
			
 
				+				desc, lastChar, value, intVal, floatVal;
			
 
				+
			
 
				+			// 13. For each descriptor in descriptors, run the appropriate set of steps
			
 
				+			// from the following list:
			
 
				+			for (i = 0; i < descriptors.length; i++) {
			
 
				+				desc = descriptors[i];
			
 
				+
			
 
				+				lastChar = desc[desc.length - 1];
			
 
				+				value = desc.substring(0, desc.length - 1);
			
 
				+				intVal = parseInt(value, 10);
			
 
				+				floatVal = parseFloat(value);
			
 
				+
			
 
				+				// If the descriptor consists of a valid non-negative integer followed by
			
 
				+				// a U+0077 LATIN SMALL LETTER W character
			
 
				+				if (regexNonNegativeInteger.test(value) && (lastChar === "w")) {
			
 
				+
			
 
				+					// If width and density are not both absent, then let error be yes.
			
 
				+					if (w || d) { pError = true; }
			
 
				+
			
 
				+					// Apply the rules for parsing non-negative integers to the descriptor.
			
 
				+					// If the result is zero, let error be yes.
			
 
				+					// Otherwise, let width be the result.
			
 
				+					if (intVal === 0) { pError = true; } else { w = intVal; }
			
 
				+
			
 
				+					// If the descriptor consists of a valid floating-point number followed by
			
 
				+					// a U+0078 LATIN SMALL LETTER X character
			
 
				+				} else if (regexFloatingPoint.test(value) && (lastChar === "x")) {
			
 
				+
			
 
				+					// If width, density and future-compat-h are not all absent, then let error
			
 
				+					// be yes.
			
 
				+					if (w || d || h) { pError = true; }
			
 
				+
			
 
				+					// Apply the rules for parsing floating-point number values to the descriptor.
			
 
				+					// If the result is less than zero, let error be yes. Otherwise, let density
			
 
				+					// be the result.
			
 
				+					if (floatVal < 0) { pError = true; } else { d = floatVal; }
			
 
				+
			
 
				+					// If the descriptor consists of a valid non-negative integer followed by
			
 
				+					// a U+0068 LATIN SMALL LETTER H character
			
 
				+				} else if (regexNonNegativeInteger.test(value) && (lastChar === "h")) {
			
 
				+
			
 
				+					// If height and density are not both absent, then let error be yes.
			
 
				+					if (h || d) { pError = true; }
			
 
				+
			
 
				+					// Apply the rules for parsing non-negative integers to the descriptor.
			
 
				+					// If the result is zero, let error be yes. Otherwise, let future-compat-h
			
 
				+					// be the result.
			
 
				+					if (intVal === 0) { pError = true; } else { h = intVal; }
			
 
				+
			
 
				+					// Anything else, Let error be yes.
			
 
				+				} else { pError = true; }
			
 
				+			} // (close step 13 for loop)
			
 
				+
			
 
				+			// 15. If error is still no, then append a new image source to candidates whose
			
 
				+			// URL is url, associated with a width width if not absent and a pixel
			
 
				+			// density density if not absent. Otherwise, there is a parse error.
			
 
				+			if (!pError) {
			
 
				+				candidate.url = url;
			
 
				+				if (w) { candidate.w = w; }
			
 
				+				if (d) { candidate.d = d; }
			
 
				+				if (h) { candidate.h = h; }
			
 
				+				candidates.push(candidate);
			
 
				+			} else if (console && console.log) {  // eslint-disable-line no-console
			
 
				+				console.log("Invalid srcset descriptor found in \"" + input + "\" at \"" + desc + "\"."); // eslint-disable-line no-console
			
 
				+			}
			
 
				+		} // (close parseDescriptors fn)
			
 
				+
			
 
				+	};
			
 
				+}));
			
--- a/lib/single-file/single-file-core.js
+++ b/lib/single-file/single-file-core.js
@@ -353,7 +353,7 @@ const SingleFileCore = (() => {
 
				 				DomProcessorHelper.processAttribute(this.doc.querySelectorAll("video[poster]"), "poster", this.baseURI),
			
 
				 				DomProcessorHelper.processAttribute(this.doc.querySelectorAll("*[background]"), "background", this.baseURI),
			
 
				 				DomProcessorHelper.processAttribute(this.doc.querySelectorAll("image, use"), "xlink:href", this.baseURI),
			
 
				-				DomProcessorHelper.processSrcSet(this.doc.querySelectorAll("[srcset]"), this.baseURI)
			
 
				+				DomProcessorHelper.processSrcSet(this.doc.querySelectorAll("[srcset]"), this.baseURI, this.dom)
			
 
				 			]);
			
 
				 		}
			
 
				 
			
@@ -478,22 +478,21 @@ const SingleFileCore = (() => {
 
				 			}));
			
 
				 		}
			
 
				 
			
 
				-		static async processSrcSet(resourceElements, baseURI) {
			
 
				+		static async processSrcSet(resourceElements, baseURI, dom) {
			
 
				 			await Promise.all(Array.from(resourceElements).map(async resourceElement => {
			
 
				-				const attributeValue = resourceElement.getAttribute("srcset");
			
 
				-				const srcSet = await Promise.all(attributeValue.split(",").map(async src => {
			
 
				-					let [resourceURL, descriptor] = src.trim().split(/\s+/);
			
 
				-					resourceURL = DomUtil.normalizeURL(resourceURL);
			
 
				+				const srcSet = dom.parseSrcSet(resourceElement.getAttribute("srcset"));
			
 
				+				const srcSetValues = await Promise.all(srcSet.map(async srcSetValue => {
			
 
				+					const resourceURL = DomUtil.normalizeURL(srcSetValue.url);
			
 
				 					if (resourceURL && resourceURL != baseURI && DomUtil.testValidPath(resourceURL)) {
			
 
				 						try {
			
 
				 							const dataURI = await batchRequest.addURL(new URL(resourceURL, baseURI).href);
			
 
				-							return dataURI + (descriptor ? " " + descriptor : "");
			
 
				+							return dataURI + (srcSetValue.w ? " " + srcSetValue.w : "");
			
 
				 						} catch (e) {
			
 
				 							// ignored
			
 
				 						}
			
 
				 					}
			
 
				 				}));
			
 
				-				resourceElement.setAttribute("srcset", srcSet.join(","));
			
 
				+				resourceElement.setAttribute("srcset", srcSetValues.join(","));
			
 
				 			}));
			
 
				 		}
			
 
				 	}