docprocessor.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. (function() {
  2. var IMPORT_URL_VALUE_EXP = /(url\s*\(\s*(?:'|")?\s*([^('"\))]*)\s*(?:'|")?\s*\))|(@import\s*\(?\s*(?:'|")?\s*([^('"\))]*)\s*(?:'|")?\s*(?:\)|;))/i;
  3. var URL_VALUE_EXP = /url\s*\(\s*(?:'|")?\s*([^('"\))]*)\s*(?:'|")?\s*\)/i;
  4. var IMPORT_VALUE_ALT_EXP = /@import\s*\(?\s*(?:'|")?\s*([^('"\))]*)\s*(?:'|")?\s*(?:\)|;)/i;
  5. var URL_EXP = /url\s*\(([^\)]*)\)/gi;
  6. var IMPORT_EXP = /(@import\s*url\s*\([^\)]*\)\s*;?)|(@import\s*('|")?\s*[^\(;'"]*\s*('|")?\s*;)/gi;
  7. var IMPORT_ALT_EXP = /@import\s*('|")?\s*[^\(;'"]*\s*('|")?\s*;/gi;
  8. var EMPTY_PIXEL_DATA = "";
  9. function decodeDataURI(dataURI) {
  10. var content = dataURI.indexOf(","), meta = dataURI.substr(5, content).toLowerCase(), data = decodeURIComponent(dataURI.substr(content + 1));
  11. if (/;\s*base64\s*[;,]/.test(meta)) {
  12. data = atob(data);
  13. }
  14. if (/;\s*charset=[uU][tT][fF]-?8\s*[;,]/.test(meta)) {
  15. data = decodeURIComponent(escape(data));
  16. }
  17. return data;
  18. }
  19. function formatURL(link, host) {
  20. var i, newlinkparts, hparts, lparts;
  21. if (!link)
  22. return "";
  23. lparts = link.split('/');
  24. host = host.split("#")[0].split("?")[0];
  25. if (/http:|https:|ftp:|data:|javascript:/i.test(lparts[0]))
  26. return link.trim();
  27. hparts = host.split('/');
  28. newlinkparts = [];
  29. if (hparts.length > 3)
  30. hparts.pop();
  31. if (lparts[0] == '') {
  32. if (lparts[1] == '')
  33. host = hparts[0] + '//' + lparts[2];
  34. else
  35. host = hparts[0] + '//' + hparts[2];
  36. hparts = host.split('/');
  37. delete lparts[0];
  38. if (lparts[1] == '') {
  39. delete lparts[1];
  40. delete lparts[2];
  41. }
  42. }
  43. for (i = 0; i < lparts.length; i++) {
  44. if (lparts[i] == '..') {
  45. if (lparts[i - 1])
  46. delete lparts[i - 1];
  47. else if (hparts.length > 3)
  48. hparts.pop();
  49. delete lparts[i];
  50. }
  51. if (lparts[i] == '.')
  52. delete lparts[i];
  53. }
  54. for (i = 0; i < lparts.length; i++)
  55. if (lparts[i])
  56. newlinkparts[newlinkparts.length] = lparts[i];
  57. return (hparts.join('/') + '/' + newlinkparts.join('/')).trim();
  58. }
  59. function resolveURLs(content, host) {
  60. var ret = content.replace(URL_EXP, function(value) {
  61. var result = value.match(URL_VALUE_EXP);
  62. if (result)
  63. if (result[1].indexOf("data:") != 0)
  64. return value.replace(result[1], formatURL(result[1], host));
  65. return value;
  66. });
  67. return ret.replace(IMPORT_ALT_EXP, function(value) {
  68. var result = value.match(IMPORT_VALUE_ALT_EXP);
  69. if (result)
  70. if (result[1].indexOf("data:") != 0)
  71. return "@import \"" + formatURL(result[1], host) + "\";";
  72. return value;
  73. });
  74. }
  75. function getDataURI(data, defaultURL, woURL) {
  76. if (data.content)
  77. return (woURL ? "" : "url(") + "data:" + data.mediaType + ";" + data.mediaTypeParam + "," + data.content + (woURL ? "" : ")");
  78. else
  79. return woURL ? defaultURL : "url(" + defaultURL + ")";
  80. }
  81. function removeComments(content) {
  82. var start, end;
  83. do {
  84. start = content.indexOf("/*");
  85. end = content.indexOf("*/", start);
  86. if (start != -1 && end != -1)
  87. content = content.substring(0, start) + content.substr(end + 2);
  88. } while (start != -1 && end != -1);
  89. return content;
  90. }
  91. function replaceURLs(content, host, requestManager, callback) {
  92. var i, url, result, values = removeComments(content).match(URL_EXP), requestMax = 0, requestIndex = 0;
  93. function sendRequest(origUrl) {
  94. requestMax++;
  95. requestManager.send(url, function(data) {
  96. requestIndex++;
  97. if (content.indexOf(origUrl) != -1) {
  98. data.mediaType = data.mediaType ? data.mediaType.split(";")[0] : null;
  99. content = content.replace(new RegExp(origUrl.replace(/([{}\(\)\^$&.\*\?\/\+\|\[\\\\]|\]|\-)/g, "\\$1"), "gi"), getDataURI(data,
  100. EMPTY_PIXEL_DATA, true));
  101. }
  102. if (requestIndex == requestMax)
  103. callback(content);
  104. }, null, "base64");
  105. }
  106. if (values)
  107. for (i = 0; i < values.length; i++) {
  108. result = values[i].match(URL_VALUE_EXP);
  109. if (result && result[1]) {
  110. url = formatURL(result[1], host);
  111. if (url.indexOf("data:") != 0)
  112. sendRequest(result[1]);
  113. }
  114. }
  115. }
  116. // ----------------------------------------------------------------------------------------------
  117. function processStylesheets(doc, docElement, baseURI, requestManager) {
  118. Array.prototype.forEach.call(docElement.querySelectorAll('link[href][rel*="stylesheet"]'), function(node) {
  119. var href = node.getAttribute("href"), url = formatURL(href, baseURI);
  120. function createStyleNode(content) {
  121. var i, newNode, commentNode;
  122. newNode = doc.createElement("style");
  123. for (i = 0; i < node.attributes.length; i++)
  124. if (node.attributes[i].value)
  125. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  126. newNode.dataset.href = url;
  127. newNode.removeAttribute("href");
  128. newNode.textContent = resolveURLs(content, url);
  129. if (node.disabled) {
  130. commentNode = doc.createComment();
  131. commentNode.textContent = newNode.outerHTML.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/--/g, "&minus;&minus;");
  132. node.parentElement.replaceChild(commentNode, node);
  133. } else
  134. node.parentElement.replaceChild(newNode, node);
  135. }
  136. if (href.indexOf("data:") != 0)
  137. requestManager.send(url, function(data) {
  138. if (data.status >= 400)
  139. node.parentElement.removeChild(node);
  140. else
  141. createStyleNode(data.content || "");
  142. });
  143. else
  144. createStyleNode(decodeDataURI(href));
  145. });
  146. }
  147. function processImports(docElement, baseURI, characterSet, requestManager) {
  148. var ret = true;
  149. Array.prototype.forEach.call(docElement.querySelectorAll("style"), function(styleSheet) {
  150. var imports = removeComments(styleSheet.textContent).match(IMPORT_EXP);
  151. if (imports)
  152. imports.forEach(function(imp) {
  153. var url, href, result = imp.match(IMPORT_URL_VALUE_EXP);
  154. function insertStylesheet(content) {
  155. styleSheet.textContent = styleSheet.textContent.replace(imp, resolveURLs(content, url));
  156. }
  157. if (result && (result[2] || result[4])) {
  158. href = result[2] || result[4];
  159. url = formatURL(href, styleSheet.dataset.href || baseURI);
  160. if (href.indexOf("data:") != 0) {
  161. requestManager.send(url, function(data) {
  162. insertStylesheet(data.status < 400 && data.content ? data.content : "");
  163. }, null, characterSet);
  164. } else
  165. insertStylesheet(decodeDataURI(href));
  166. ret = false;
  167. }
  168. });
  169. });
  170. return ret;
  171. }
  172. function processStyleAttributes(docElement, baseURI, requestManager) {
  173. Array.prototype.forEach.call(docElement.querySelectorAll("*[style]"), function(node) {
  174. replaceURLs(node.getAttribute("style"), baseURI, requestManager, function(style) {
  175. node.setAttribute("style", style);
  176. });
  177. });
  178. }
  179. function processBgAttributes(docElement, baseURI, requestManager) {
  180. var backgrounds = docElement.querySelectorAll("*[background]");
  181. Array.prototype.forEach.call(backgrounds, function(node) {
  182. var url, value = node.getAttribute("background");
  183. if (value.indexOf(".") != -1) {
  184. url = formatURL(value, baseURI);
  185. if (url.indexOf("data:") != 0)
  186. requestManager.send(url, function(data) {
  187. node.setAttribute("background", getDataURI(data, EMPTY_PIXEL_DATA, true));
  188. }, null, "base64");
  189. }
  190. });
  191. }
  192. function insertDefaultFavico(doc, docElement, baseURI) {
  193. var node, docHead = docElement.querySelector("html > head"), favIcon = docElement
  194. .querySelector('link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]');
  195. if (!favIcon && docHead) {
  196. node = doc.createElement("link");
  197. node.setAttribute("type", "image/x-icon");
  198. node.setAttribute("rel", "shortcut icon");
  199. node.setAttribute("href", formatURL("/favicon.ico", baseURI));
  200. docHead.appendChild(node);
  201. }
  202. }
  203. function processImages(docElement, baseURI, requestManager) {
  204. var images;
  205. function process(attributeName) {
  206. Array.prototype.forEach.call(images, function(node) {
  207. var url = formatURL(node.getAttribute(attributeName), baseURI);
  208. if (url.indexOf("data:") != 0)
  209. requestManager.send(url, function(data) {
  210. node.setAttribute(attributeName, getDataURI(data, EMPTY_PIXEL_DATA, true));
  211. }, null, "base64");
  212. });
  213. }
  214. images = docElement.querySelectorAll('link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]');
  215. process("href");
  216. images = docElement.querySelectorAll('img[src], input[src][type="image"]');
  217. process("src");
  218. images = docElement.querySelectorAll('video[poster]');
  219. process("poster");
  220. }
  221. function processSVGs(docElement, baseURI, requestManager) {
  222. var images = docElement.querySelectorAll('object[type="image/svg+xml"], object[type="image/svg-xml"], embed[src*=".svg"]');
  223. Array.prototype.forEach.call(images, function(node) {
  224. var data = node.getAttribute("data"), src = node.getAttribute("src"), url = formatURL(data || src, baseURI);
  225. if (url.indexOf("data:") != 0)
  226. requestManager.send(url, function(data) {
  227. node.setAttribute(data ? "data" : "src", getDataURI(data, "data:text/xml,<svg></svg>", true));
  228. }, null, null);
  229. });
  230. }
  231. function processStyles(docElement, baseURI, requestManager) {
  232. Array.prototype.forEach.call(docElement.querySelectorAll("style"), function(styleSheet) {
  233. replaceURLs(styleSheet.textContent, styleSheet.dataset.href || baseURI, requestManager, function(textContent) {
  234. styleSheet.textContent = textContent;
  235. });
  236. });
  237. }
  238. function processScripts(docElement, baseURI, characterSet, requestManager) {
  239. Array.prototype.forEach.call(docElement.querySelectorAll("script[src]"), function(node) {
  240. var src = node.getAttribute("src");
  241. if (src.indexOf("data:") != 0)
  242. requestManager.send(formatURL(src, baseURI), function(data) {
  243. if (data.status < 400) {
  244. data.content = data.content.replace(/"([^"]*)<\/\s*script\s*>([^"]*)"/gi, '"$1<"+"/script>$2"');
  245. data.content = data.content.replace(/'([^']*)<\/\s*script\s*>([^']*)'/gi, "'$1<'+'/script>$2'");
  246. node.textContent = "\n" + data.content + "\n";
  247. }
  248. node.removeAttribute("src");
  249. }, characterSet);
  250. });
  251. }
  252. function processCanvas(doc, docElement, canvasData) {
  253. var index = 0;
  254. Array.prototype.forEach.call(docElement.querySelectorAll("canvas"), function(node) {
  255. var i, data = canvasData[index], newNode = doc.createElement("img");
  256. if (data) {
  257. newNode.setAttribute("src", data);
  258. for (i = 0; i < node.attributes.length; i++)
  259. if (node.attributes[i].value)
  260. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  261. if (!newNode.width)
  262. newNode.style.pixelWidth = node.clientWidth;
  263. if (!newNode.height)
  264. newNode.style.pixelHeight = node.clientHeight;
  265. node.parentElement.replaceChild(newNode, node);
  266. }
  267. index++;
  268. });
  269. }
  270. function removeScripts(docElement) {
  271. Array.prototype.forEach.call(docElement.querySelectorAll("script"), function(node) {
  272. node.parentElement.removeChild(node);
  273. });
  274. Array.prototype.forEach.call(docElement.querySelectorAll("*[onload]"), function(node) {
  275. node.removeAttribute("onload");
  276. });
  277. }
  278. function removeObjects(docElement) {
  279. var objects = docElement.querySelectorAll('applet, object:not([type="image/svg+xml"]):not([type="image/svg-xml"]), embed:not([src*=".svg"])');
  280. Array.prototype.forEach.call(objects, function(node) {
  281. node.parentElement.removeChild(node);
  282. });
  283. objects = docElement.querySelectorAll('audio[src], video[src]');
  284. Array.prototype.forEach.call(objects, function(node) {
  285. node.removeAttribute("src");
  286. });
  287. }
  288. function removeBlockquotesCite(docElement) {
  289. Array.prototype.forEach.call(docElement.querySelectorAll("blockquote[cite]"), function(node) {
  290. node.removeAttribute("cite");
  291. });
  292. }
  293. function removeFrames(docElement) {
  294. Array.prototype.forEach.call(docElement.querySelectorAll("iframe, frame"), function(node) {
  295. node.parentElement.removeChild(node);
  296. });
  297. }
  298. function removeMetaRefresh(docElement) {
  299. Array.prototype.forEach.call(docElement.querySelectorAll("meta[http-equiv=refresh]"), function(node) {
  300. node.parentElement.removeChild(node);
  301. });
  302. }
  303. function resetFrames(docElement, baseURI) {
  304. Array.prototype.forEach.call(docElement.querySelectorAll("iframe, frame"), function(node) {
  305. var src = formatURL(node.getAttribute("src"), baseURI);
  306. if (src.indexOf("data:") != 0)
  307. node.setAttribute("src", "about:blank");
  308. });
  309. }
  310. function setAbsoluteLinks(docElement, baseURI) {
  311. Array.prototype.forEach.call(docElement.querySelectorAll("a:not([href^='#'])"), function(link) {
  312. var fullHref = formatURL(link.getAttribute("href"), baseURI);
  313. if (fullHref && (!(fullHref.indexOf(baseURI.split("#")[0]) == 0) || fullHref.indexOf("#") == -1))
  314. link.setAttribute("href", fullHref);
  315. });
  316. }
  317. // ----------------------------------------------------------------------------------------------
  318. singlefile.initProcess = function(doc, docElement, addDefaultFavico, baseURI, characterSet, config, canvasData, requestManager, onInit, onProgress, onEnd) {
  319. var initManager = new RequestManager(), manager = new RequestManager(onProgress);
  320. function RequestManager(onProgress) {
  321. var that = this, currentCount = 0, requests = [];
  322. this.requestCount = 0;
  323. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  324. this.requestCount++;
  325. requests.push({
  326. url : url,
  327. responseHandler : responseHandler,
  328. characterSet : characterSet,
  329. mediaTypeParam : mediaTypeParam
  330. });
  331. };
  332. this.doSend = function() {
  333. requests.forEach(function(request) {
  334. requestManager.send(request.url, function(response) {
  335. request.responseHandler(response);
  336. currentCount++;
  337. if (onProgress)
  338. onProgress(currentCount, that.requestCount);
  339. if (currentCount == that.requestCount) {
  340. that.requestCount = 0;
  341. currentCount = 0;
  342. if (that.onEnd)
  343. that.onEnd();
  344. }
  345. }, request.characterSet, request.mediaTypeParam);
  346. });
  347. requests = [];
  348. };
  349. }
  350. function cbImports() {
  351. if (config.removeScripts)
  352. removeScripts(docElement);
  353. if (config.removeObjects)
  354. removeObjects(docElement);
  355. if (config.removeFrames || config.getRawDoc)
  356. removeFrames(docElement);
  357. resetFrames(docElement, baseURI);
  358. removeBlockquotesCite(docElement);
  359. removeMetaRefresh(docElement);
  360. setAbsoluteLinks(docElement, baseURI);
  361. if (addDefaultFavico)
  362. insertDefaultFavico(doc, docElement, baseURI);
  363. processStyleAttributes(docElement, baseURI, manager);
  364. processBgAttributes(docElement, baseURI, manager);
  365. processImages(docElement, baseURI, manager);
  366. processSVGs(docElement, baseURI, manager);
  367. processStyles(docElement, baseURI, manager);
  368. processScripts(docElement, baseURI, characterSet, manager);
  369. processCanvas(doc, docElement, canvasData);
  370. if (onInit)
  371. setTimeout(function() {
  372. onInit(manager.requestCount);
  373. }, 1);
  374. }
  375. function cbStylesheets() {
  376. initManager.onEnd = function(noRequests) {
  377. if (noRequests)
  378. cbImports();
  379. else
  380. cbStylesheets();
  381. };
  382. processImports(docElement, baseURI, characterSet, initManager);
  383. initManager.doSend();
  384. if (initManager.requestCount == 0)
  385. cbImports();
  386. }
  387. manager.onEnd = onEnd;
  388. processStylesheets(doc, docElement, baseURI, initManager);
  389. initManager.onEnd = cbStylesheets;
  390. initManager.doSend();
  391. if (initManager.requestCount == 0)
  392. initManager.onEnd();
  393. return function() {
  394. manager.doSend();
  395. if (manager.onEnd && manager.requestCount == 0)
  396. manager.onEnd();
  397. };
  398. };
  399. })();