docprocessor.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /*
  2. * Copyright 2011 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile Core.
  6. *
  7. * SingleFile Core is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile Core is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile Core. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function() {
  21. var IMPORT_URL_VALUE_EXP = /(url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\))|(@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;))/i;
  22. var URL_VALUE_EXP = /url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\)/i;
  23. var IMPORT_VALUE_ALT_EXP = /@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;)/i;
  24. var URL_EXP = /url\s*\(([^\)]*)\)/gi;
  25. var IMPORT_EXP = /(@import\s*url\s*\([^\)]*\)\s*;?)|(@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;)/gi;
  26. var IMPORT_ALT_EXP = /@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;/gi;
  27. var EMPTY_PIXEL_DATA = "data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==";
  28. function formatURL(link, host) {
  29. var i, newlinkparts, hparts, lparts;
  30. if (!link)
  31. return "";
  32. lparts = link.split('/');
  33. host = host.split("#")[0].split("?")[0];
  34. if (/http:|https:|ftp:|data:|javascript:/i.test(lparts[0]))
  35. return link.trim();
  36. hparts = host.split('/');
  37. newlinkparts = [];
  38. if (hparts.length > 3)
  39. hparts.pop();
  40. if (lparts[0] == '') {
  41. if (lparts[1] == '')
  42. host = hparts[0] + '//' + lparts[2];
  43. else
  44. host = hparts[0] + '//' + hparts[2];
  45. hparts = host.split('/');
  46. delete lparts[0];
  47. if (lparts[1] == '') {
  48. delete lparts[1];
  49. delete lparts[2];
  50. }
  51. }
  52. for (i = 0; i < lparts.length; i++) {
  53. if (lparts[i] == '..') {
  54. if (lparts[i - 1])
  55. delete lparts[i - 1];
  56. else if (hparts.length > 3)
  57. hparts.pop();
  58. delete lparts[i];
  59. }
  60. if (lparts[i] == '.')
  61. delete lparts[i];
  62. }
  63. for (i = 0; i < lparts.length; i++)
  64. if (lparts[i])
  65. newlinkparts[newlinkparts.length] = lparts[i];
  66. return (hparts.join('/') + '/' + newlinkparts.join('/')).trim();
  67. }
  68. function resolveURLs(content, host) {
  69. var ret = content.replace(URL_EXP, function(value) {
  70. var result = value.match(URL_VALUE_EXP);
  71. if (result)
  72. if (!(result[1].indexOf("data:") == 0))
  73. return value.replace(result[1], formatURL(result[1], host));
  74. return value;
  75. });
  76. return ret.replace(IMPORT_ALT_EXP, function(value) {
  77. var result = value.match(IMPORT_VALUE_ALT_EXP);
  78. if (result)
  79. if (!(result[1].indexOf("data:") == 0))
  80. return "@import \"" + formatURL(result[1], host) + "\";";
  81. return value;
  82. });
  83. }
  84. function getDataURI(data, defaultURL, woURL) {
  85. if (data.content)
  86. return [ woURL ? "" : "url(", "data:", data.mediaType, ";", data.mediaTypeParam, ",", data.content, woURL ? "" : ")" ].join("");
  87. else
  88. return woURL ? defaultURL : [ "url(", defaultURL, ")" ].join("");
  89. }
  90. function removeComments(content) {
  91. var start, end;
  92. do {
  93. start = content.indexOf("/*");
  94. end = content.indexOf("*/", start);
  95. if (start != -1 && end != -1)
  96. content = [ content.substring(0, start), content.substr(end + 2) ].join("");
  97. } while (start != -1 && end != -1);
  98. return content;
  99. }
  100. function replaceURLs(content, host, requestManager, callback) {
  101. var i, url, result, values = removeComments(content).match(URL_EXP), requestMax = 0, requestIndex = 0;
  102. function sendRequest(origUrl) {
  103. requestMax++;
  104. requestManager.send(url, function(data) {
  105. requestIndex++;
  106. if (content.indexOf(origUrl) != -1) {
  107. data.mediaType = data.mediaType ? data.mediaType.split(";")[0] : null;
  108. content = content.replace(new RegExp(origUrl.replace(/([{}\(\)\^$&.\*\?\/\+\|\[\\\\]|\]|\-)/g, "\\$1"), "gi"), getDataURI(data,
  109. EMPTY_PIXEL_DATA, true));
  110. }
  111. if (requestIndex == requestMax)
  112. callback(content);
  113. }, "x-user-defined", "base64");
  114. }
  115. if (values)
  116. for (i = 0; i < values.length; i++) {
  117. result = values[i].match(URL_VALUE_EXP);
  118. if (result && result[1]) {
  119. url = formatURL(result[1], host);
  120. if (!(url.indexOf("data:") == 0))
  121. sendRequest(result[1]);
  122. }
  123. }
  124. }
  125. // ----------------------------------------------------------------------------------------------
  126. function processStylesheets(doc, docElement, baseURI, requestManager) {
  127. Array.prototype.forEach.call(docElement.querySelectorAll('link[href][rel*="stylesheet"]'), function(node) {
  128. var href = node.getAttribute("href"), fullHref = formatURL(href, baseURI);
  129. if (!(href.indexOf("data:") == 0)) {
  130. requestManager.send(fullHref, function(data) {
  131. var i, newNode, commentNode;
  132. if (data.status >= 400) {
  133. node.parentElement.removeChild(node);
  134. return;
  135. }
  136. newNode = doc.createElement("style");
  137. for (i = 0; i < node.attributes.length; i++)
  138. if (node.attributes[i].value)
  139. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  140. newNode._baseURI = fullHref;
  141. newNode.removeAttribute("href");
  142. newNode.textContent = resolveURLs(data.content || "", data.url);
  143. if (node.disabled) {
  144. commentNode = doc.createComment();
  145. commentNode.textContent = newNode.outerHTML.replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/--/g, "&minus;&minus;");
  146. node.parentElement.replaceChild(commentNode, node);
  147. } else
  148. node.parentElement.replaceChild(newNode, node);
  149. });
  150. }
  151. });
  152. }
  153. function processImports(docElement, baseURI, characterSet, requestManager) {
  154. var ret = true;
  155. Array.prototype.forEach.call(docElement.querySelectorAll("style"), function(styleSheet) {
  156. var i, url, result, imports = removeComments(styleSheet.textContent).match(IMPORT_EXP);
  157. function sendRequest(imp) {
  158. requestManager.send(url,
  159. function(data) {
  160. styleSheet.textContent = styleSheet.textContent.replace(imp, data.status < 400 && data.content ? resolveURLs(data.content,
  161. data.url) : "");
  162. }, null, characterSet);
  163. ret = false;
  164. }
  165. if (imports)
  166. for (i = 0; i < imports.length; i++) {
  167. result = imports[i].match(IMPORT_URL_VALUE_EXP);
  168. if (result && (result[2] || result[4])) {
  169. url = formatURL(result[2] || result[4], styleSheet._baseURI || baseURI);
  170. if (!(url.indexOf("data:") == 0))
  171. sendRequest(imports[i]);
  172. }
  173. }
  174. });
  175. return ret;
  176. }
  177. function processStyleAttributes(docElement, baseURI, requestManager) {
  178. Array.prototype.forEach.call(docElement.querySelectorAll("*[style]"), function(node) {
  179. replaceURLs(node.getAttribute("style"), baseURI, requestManager, function(style) {
  180. node.setAttribute("style", style);
  181. });
  182. });
  183. }
  184. function processBgAttributes(docElement, baseURI, requestManager) {
  185. var backgrounds = docElement.querySelectorAll("*[background]");
  186. Array.prototype.forEach.call(backgrounds, function(node) {
  187. var url, value = node.getAttribute("background");
  188. if (value.indexOf(".") != -1) {
  189. url = formatURL(value, baseURI);
  190. if (!(url.indexOf("data:") == 0))
  191. requestManager.send(url, function(data) {
  192. node.setAttribute("background", getDataURI(data, EMPTY_PIXEL_DATA, true));
  193. }, "x-user-defined", "base64");
  194. }
  195. });
  196. }
  197. function insertDefaultFavico(doc, docElement, baseURI) {
  198. var node, docHead = docElement.querySelector("html > head"), favIcon = docElement
  199. .querySelector('link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]');
  200. if (!favIcon && docHead) {
  201. node = doc.createElement("link");
  202. node.setAttribute("type", "image/x-icon");
  203. node.setAttribute("rel", "shortcut icon");
  204. node.setAttribute("href", formatURL("/favicon.ico", baseURI));
  205. docHead.appendChild(node);
  206. }
  207. }
  208. function processImages(docElement, baseURI, requestManager) {
  209. var images;
  210. function process(attributeName) {
  211. Array.prototype.forEach.call(images, function(node) {
  212. var url = formatURL(node.getAttribute(attributeName), baseURI);
  213. if (!(url.indexOf("data:") == 0))
  214. requestManager.send(url, function(data) {
  215. node.setAttribute(attributeName, getDataURI(data, EMPTY_PIXEL_DATA, true));
  216. }, "x-user-defined", "base64");
  217. });
  218. }
  219. images = docElement.querySelectorAll('link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]');
  220. process("href");
  221. images = docElement.querySelectorAll('img[src], input[src][type="image"]');
  222. process("src");
  223. images = docElement.querySelectorAll('video[poster]');
  224. process("poster");
  225. }
  226. function processSVGs(docElement, baseURI, requestManager) {
  227. var images = docElement.querySelectorAll('object[type="image/svg+xml"], object[type="image/svg-xml"], embed[src*=".svg"]');
  228. Array.prototype.forEach.call(images, function(node) {
  229. var data = node.getAttribute("data"), src = node.getAttribute("src"), url = formatURL(data || src, baseURI);
  230. if (!(url.indexOf("data:") == 0))
  231. requestManager.send(url, function(data) {
  232. node.setAttribute(data ? "data" : "src", getDataURI(data, "data:text/xml,<svg></svg>", true));
  233. }, null, null);
  234. });
  235. }
  236. function processStyles(docElement, baseURI, requestManager) {
  237. Array.prototype.forEach.call(docElement.querySelectorAll("style"), function(styleSheet) {
  238. replaceURLs(styleSheet.textContent, styleSheet._baseURI || baseURI, requestManager, function(textContent) {
  239. styleSheet.textContent = textContent;
  240. });
  241. });
  242. }
  243. function processScripts(docElement, baseURI, characterSet, requestManager) {
  244. Array.prototype.forEach.call(docElement.querySelectorAll("script[src]"), function(node) {
  245. var src = node.getAttribute("src");
  246. if (!(src.indexOf("data:") == 0))
  247. requestManager.send(formatURL(src, baseURI), function(data) {
  248. if (data.status < 400) {
  249. data.content = data.content.replace(/"([^"]*)<\/\s*script\s*>([^"]*)"/gi, '"$1<"+"/script>$2"');
  250. data.content = data.content.replace(/'([^']*)<\/\s*script\s*>([^']*)'/gi, "'$1<'+'/script>$2'");
  251. node.textContent = [ "\n", data.content, "\n" ].join("");
  252. }
  253. node.removeAttribute("src");
  254. }, characterSet);
  255. });
  256. }
  257. function processCanvas(doc, docElement, canvasData) {
  258. var index = 0;
  259. Array.prototype.forEach.call(docElement.querySelectorAll("canvas"), function(node) {
  260. var i, data = canvasData[index], newNode = doc.createElement("img");
  261. if (data) {
  262. newNode.setAttribute("src", data);
  263. for (i = 0; i < node.attributes.length; i++)
  264. if (node.attributes[i].value)
  265. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  266. if (!newNode.width)
  267. newNode.style.pixelWidth = node.clientWidth;
  268. if (!newNode.height)
  269. newNode.style.pixelHeight = node.clientHeight;
  270. node.parentElement.replaceChild(newNode, node);
  271. }
  272. index++;
  273. });
  274. }
  275. function removeScripts(docElement) {
  276. var body = docElement.querySelector("html > body");
  277. Array.prototype.forEach.call(docElement.querySelectorAll("script"), function(node) {
  278. node.parentElement.removeChild(node);
  279. });
  280. if (body && body.getAttribute("onload"))
  281. body.removeAttribute("onload");
  282. }
  283. function removeObjects(docElement) {
  284. var objects = docElement.querySelectorAll('applet, object:not([type="image/svg+xml"]):not([type="image/svg-xml"]), embed:not([src*=".svg"])');
  285. Array.prototype.forEach.call(objects, function(node) {
  286. node.parentElement.removeChild(node);
  287. });
  288. objects = docElement.querySelectorAll('audio[src], video[src]');
  289. Array.prototype.forEach.call(objects, function(node) {
  290. node.removeAttribute("src");
  291. });
  292. }
  293. function removeBlockquotesCite(docElement) {
  294. Array.prototype.forEach.call(docElement.querySelectorAll("blockquote[cite]"), function(node) {
  295. node.removeAttribute("cite");
  296. });
  297. }
  298. function removeFrames(docElement) {
  299. Array.prototype.forEach.call(docElement.querySelectorAll("iframe, frame"), function(node) {
  300. node.parentElement.removeChild(node);
  301. });
  302. }
  303. function resetFrames(docElement, baseURI) {
  304. Array.prototype.forEach.call(docElement.querySelectorAll("iframe, frame"), function(node) {
  305. var src = formatURL(node.getAttribute("src"), baseURI);
  306. if (src.indexOf("data:") != 0)
  307. node.setAttribute("src", "about:blank");
  308. });
  309. }
  310. function setAbsoluteLinks(docElement, baseURI) {
  311. Array.prototype.forEach.call(docElement.querySelectorAll("a[href]"), function(link) {
  312. var fullHref = formatURL(link.getAttribute("href"), baseURI);
  313. if (fullHref && (!(fullHref.indexOf(baseURI.split("#")[0]) == 0) || fullHref.indexOf("#") == -1))
  314. link.setAttribute("href", fullHref);
  315. });
  316. }
  317. // ----------------------------------------------------------------------------------------------
  318. singlefile.initProcess = function(doc, docElement, addDefaultFavico, baseURI, characterSet, config, canvasData, requestManager, onInit, onProgress, onEnd) {
  319. var initManager = new RequestManager(), manager = new RequestManager(onProgress);
  320. function RequestManager(onProgress) {
  321. var that = this, currentCount = 0, requests = [];
  322. this.requestCount = 0;
  323. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  324. this.requestCount++;
  325. requests.push({
  326. url : url,
  327. responseHandler : responseHandler,
  328. characterSet : characterSet,
  329. mediaTypeParam : mediaTypeParam
  330. });
  331. };
  332. this.doSend = function() {
  333. requests.forEach(function(request) {
  334. requestManager.send(request.url, function(response) {
  335. request.responseHandler(response);
  336. currentCount++;
  337. if (onProgress)
  338. onProgress(currentCount, that.requestCount);
  339. if (currentCount == that.requestCount) {
  340. that.requestCount = 0;
  341. currentCount = 0;
  342. if (that.onEnd)
  343. that.onEnd();
  344. }
  345. }, request.characterSet, request.mediaTypeParam);
  346. });
  347. requests = [];
  348. };
  349. }
  350. function cbImports() {
  351. if (config.removeScripts)
  352. removeScripts(docElement);
  353. if (config.removeObjects)
  354. removeObjects(docElement);
  355. if (config.removeFrames || config.getRawDoc)
  356. removeFrames(docElement);
  357. resetFrames(docElement, baseURI);
  358. removeBlockquotesCite(docElement);
  359. setAbsoluteLinks(docElement, baseURI);
  360. if (addDefaultFavico)
  361. insertDefaultFavico(doc, docElement, baseURI);
  362. processStyleAttributes(docElement, baseURI, manager);
  363. processBgAttributes(docElement, baseURI, manager);
  364. processImages(docElement, baseURI, manager);
  365. processSVGs(docElement, baseURI, manager);
  366. processStyles(docElement, baseURI, manager);
  367. processScripts(docElement, baseURI, characterSet, manager);
  368. processCanvas(doc, docElement, canvasData);
  369. if (onInit)
  370. setTimeout(function() {
  371. onInit(manager.requestCount);
  372. }, 1);
  373. }
  374. function cbStylesheets() {
  375. initManager.onEnd = function(noRequests) {
  376. if (noRequests)
  377. cbImports();
  378. else
  379. cbStylesheets();
  380. };
  381. processImports(docElement, baseURI, characterSet, initManager);
  382. initManager.doSend();
  383. if (initManager.requestCount == 0)
  384. cbImports();
  385. }
  386. manager.onEnd = onEnd;
  387. processStylesheets(doc, docElement, baseURI, initManager);
  388. initManager.onEnd = cbStylesheets;
  389. initManager.doSend();
  390. if (initManager.requestCount == 0)
  391. initManager.onEnd();
  392. return function() {
  393. manager.doSend();
  394. if (manager.requestCount == 0 && manager.onEnd)
  395. manager.onEnd();
  396. };
  397. };
  398. })();