filters.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Copyright 2010 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function(holder) {
  21. var IMPORT_URL_VALUE_EXP = /(url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\))|(@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;))/i;
  22. var URL_VALUE_EXP = /url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\)/i;
  23. var IMPORT_VALUE_ALT_EXP = /@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;)/i;
  24. var URL_EXP = /url\s*\(([^\)]*)\)/gi;
  25. var IMPORT_EXP = /(@import\s*url\s*\([^\)]*\)\s*;?)|(@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;)/gi;
  26. var IMPORT_ALT_EXP = /@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;/gi;
  27. var EMPTY_PIXEL_DATA = "data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==";
  28. var targetDoc;
  29. function trim(s) {
  30. return s.replace(/^\s*([\S\s]*?)\s*$/, '$1');
  31. }
  32. function formatURL(link, host) {
  33. var i, newlinkparts, hparts, lparts = link.split('/');
  34. host = host.split("#")[0].split("?")[0];
  35. if (/http:|https:|ftp:|data:|javascript:/i.test(lparts[0]))
  36. return trim(link);
  37. hparts = host.split('/');
  38. newlinkparts = [];
  39. if (hparts.length > 3)
  40. hparts.pop();
  41. if (lparts[0] == '') {
  42. if (lparts[1] == '')
  43. host = hparts[0] + '//' + lparts[2];
  44. else
  45. host = hparts[0] + '//' + hparts[2];
  46. hparts = host.split('/');
  47. delete lparts[0];
  48. if (lparts[1] == '') {
  49. delete lparts[1];
  50. delete lparts[2];
  51. }
  52. }
  53. for (i = 0; i < lparts.length; i++) {
  54. if (lparts[i] == '..') {
  55. if (lparts[i - 1])
  56. delete lparts[i - 1];
  57. else if (hparts.length > 3)
  58. hparts.pop();
  59. delete lparts[i];
  60. }
  61. if (lparts[i] == '.')
  62. delete lparts[i];
  63. }
  64. for (i = 0; i < lparts.length; i++)
  65. if (lparts[i])
  66. newlinkparts[newlinkparts.length] = lparts[i];
  67. return trim(hparts.join('/') + '/' + newlinkparts.join('/'));
  68. }
  69. function resolveURLs(content, host) {
  70. var ret = content.replace(URL_EXP, function(value) {
  71. var result = value.match(URL_VALUE_EXP);
  72. if (result)
  73. if (result[1].indexOf("data:") != 0)
  74. return value.replace(result[1], formatURL(result[1], host));
  75. return value;
  76. });
  77. return ret.replace(IMPORT_ALT_EXP, function(value) {
  78. var result = value.match(IMPORT_VALUE_ALT_EXP);
  79. if (result)
  80. if (result[1].indexOf("data:") != 0)
  81. return "@import \"" + formatURL(result[1], host) + "\";";
  82. return value;
  83. });
  84. }
  85. function getDataURI(data, defaultURL, woURL) {
  86. if (data.content)
  87. return (woURL ? "" : "url(") + "data:" + data.mediaType + ";" + data.mediaTypeParam + "," + data.content + (woURL ? "" : ")");
  88. else
  89. return woURL ? defaultURL : ("url(" + defaultURL + ")");
  90. }
  91. function removeCssComments(content) {
  92. var start, end;
  93. do {
  94. start = content.indexOf("/*");
  95. end = content.indexOf("*/", start);
  96. if (start != -1 && end != -1)
  97. content = content.substring(0, start) + content.substr(end + 2);
  98. } while (start != -1 && end != -1);
  99. return content;
  100. }
  101. function replaceCssURLs(getStyle, setStyle, host, callback) {
  102. var i, url, result, values = removeCssComments(getStyle()).match(URL_EXP);
  103. if (values)
  104. for (i = 0; i < values.length; i++) {
  105. result = values[i].match(URL_VALUE_EXP);
  106. if (result && result[1]) {
  107. url = formatURL(result[1], host);
  108. if (url.indexOf("data:") != 0)
  109. (function(origUrl) {
  110. callback(url, function(data) {
  111. if (getStyle().indexOf(origUrl) != -1)
  112. setStyle(getStyle().replace(new RegExp(origUrl.replace(/([{}\(\)\^$&.\*\?\/\+\|\[\\\\]|\]|\-)/g, "\\$1"), "gi"),
  113. getDataURI(data, EMPTY_PIXEL_DATA, true)));
  114. }, true);
  115. })(result[1]);
  116. }
  117. }
  118. }
  119. holder.filters = {
  120. init : function(doc) {
  121. targetDoc = doc;
  122. },
  123. document : {
  124. getStylesheets : function(doc, sendRequest) {
  125. holder.filters.link.get(doc, sendRequest);
  126. holder.filters.style.getImport(doc, sendRequest);
  127. },
  128. get : function(doc, sendRequest, topWindow) {
  129. holder.filters.styleAttr.get(doc, sendRequest);
  130. holder.filters.bgAttr.get(doc, sendRequest);
  131. holder.filters.image.get(doc, sendRequest);
  132. if (topWindow)
  133. holder.filters.image.getFavico(doc, sendRequest);
  134. holder.filters.svg.get(doc, sendRequest);
  135. holder.filters.script.get(doc, sendRequest);
  136. holder.filters.style.getURL(doc, sendRequest);
  137. },
  138. getDoctype : function() {
  139. var docType = targetDoc.doctype, docTypeStr;
  140. if (docType) {
  141. docTypeStr = "<!DOCTYPE " + docType.nodeName;
  142. if (docType.publicId) {
  143. docTypeStr += " PUBLIC \"" + docType.publicId + "\"";
  144. if (docType.systemId)
  145. docTypeStr += " \"" + docType.systemId + "\"";
  146. } else if (docType.systemId)
  147. docTypeStr += " SYSTEM \"" + docType.systemId + "\"";
  148. if (docType.internalSubset)
  149. docTypeStr += " [" + docType.internalSubset + "]";
  150. return docTypeStr + ">\n";
  151. }
  152. return "";
  153. }
  154. },
  155. element : {
  156. clean : function(doc) {
  157. Array.prototype.forEach.call(doc.querySelectorAll("blockquote[cite]"), function(element) {
  158. element.removeAttribute("cite");
  159. });
  160. },
  161. removeHidden : function() {
  162. if (targetDoc.body)
  163. Array.prototype.forEach.call(targetDoc.body.querySelectorAll("*:not(style):not(script):not(link):not(area)"), function(element) {
  164. var style = getComputedStyle(element);
  165. if ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0) && (element.id != "__SingleFile_mask__"))
  166. element.parentElement.removeChild(element);
  167. });
  168. }
  169. },
  170. a : {
  171. setAbsolute : function(doc) {
  172. var baseURI = document.baseURI.split("#")[0];
  173. Array.prototype.forEach.call(doc.querySelectorAll("a[href]"), function(link) {
  174. if (link.href && (link.href.indexOf(baseURI) != 0 || link.href.indexOf("#") == -1))
  175. link.href = link.href;
  176. });
  177. }
  178. },
  179. frame : {
  180. clean : function() {
  181. Array.prototype.forEach.call(targetDoc.querySelectorAll("iframe[src], frame[src]"), function(frame) {
  182. if (!frame.src)
  183. frame.removeAttribute("src");
  184. });
  185. },
  186. count : function() {
  187. return targetDoc.querySelectorAll("iframe[src], frame[src]").length;
  188. },
  189. remove : function(doc) {
  190. Array.prototype.forEach.call(doc.querySelectorAll("iframe[src], frame[src]"), function(frame) {
  191. frame.src = "about:blank";
  192. });
  193. },
  194. set : function(doc, urlsArray) {
  195. Array.prototype.forEach.call(doc.querySelectorAll("iframe[src], frame[src]"), function(frame, index) {
  196. frame.src = urlsArray[index] || "about:blank";
  197. });
  198. }
  199. },
  200. object : {
  201. remove : function(doc) {
  202. var i, nodes = doc.querySelectorAll('applet, object:not([type="image/svg+xml"]):not([type="image/svg-xml"]), embed:not([src*=".svg"])');
  203. for (i = 0; i < nodes.length; i++)
  204. nodes[i].parentElement.removeChild(nodes[i]);
  205. }
  206. },
  207. styleAttr : {
  208. get : function(doc, sendRequest) {
  209. var STYLE_ATTR_SELECTOR = "*[style]";
  210. Array.prototype.forEach.call(doc.querySelectorAll(STYLE_ATTR_SELECTOR), function(node) {
  211. replaceCssURLs(function() {
  212. return node.getAttribute("style");
  213. }, function(value) {
  214. node.setAttribute("style", value);
  215. }, targetDoc.baseURI, sendRequest);
  216. });
  217. }
  218. },
  219. bgAttr : {
  220. get : function(doc, sendRequest) {
  221. var BG_SELECTOR = 'body[background],table[background],thead[background],tbody[background],tr[background],th[background],td[background]';
  222. Array.prototype.forEach.call(doc.querySelectorAll(BG_SELECTOR), function(node) {
  223. var url, value = node.getAttribute("background");
  224. if (value.indexOf(".") != -1) {
  225. url = formatURL(value, targetDoc.baseURI);
  226. if (url.indexOf("data:") != 0)
  227. sendRequest(url, function(data) {
  228. node.setAttribute("background", getDataURI(data, EMPTY_PIXEL_DATA, true));
  229. }, true);
  230. }
  231. });
  232. }
  233. },
  234. image : {
  235. get : function(doc, sendRequest) {
  236. var IMG_SELECTOR = 'link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"], img[src], input[src][type="image"], video[poster]';
  237. Array.prototype.forEach.call(doc.querySelectorAll(IMG_SELECTOR), function(node) {
  238. var url = formatURL(node.href || node.src || node.poster, targetDoc.baseURI);
  239. if (url.indexOf("data:") != 0)
  240. sendRequest(url, function(data) {
  241. node.setAttribute(node.href ? "href" : node.src ? "src" : "poster", getDataURI(data, EMPTY_PIXEL_DATA, true));
  242. }, true);
  243. });
  244. },
  245. getFavico : function(doc, sendRequest) {
  246. var node, docHead = doc.querySelector("html > head"), foundLink = false, IMG_SELECTOR = 'link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]';
  247. Array.prototype.forEach.call(doc.querySelectorAll(IMG_SELECTOR), function(n) {
  248. var url = formatURL(n.href, targetDoc.baseURI);
  249. if (!foundLink && url.indexOf("data:") != 0)
  250. foundLink = true;
  251. });
  252. if (!foundLink && docHead) {
  253. node = targetDoc.createElement("link");
  254. node.type = "image/x-icon";
  255. node.rel = "shortcut icon";
  256. node.href = "/favicon.ico";
  257. docHead.appendChild(node);
  258. sendRequest(node.href, function(data) {
  259. node.setAttribute(node.href ? "href" : "src", getDataURI(data, EMPTY_PIXEL_DATA, true));
  260. }, true);
  261. }
  262. },
  263. getFavicoData : function(doc) {
  264. var favico, favicosByRel = {}, favicos = doc.querySelectorAll('link[href][rel="shortcut icon"], link[href][rel="icon"], link[href][rel="apple-touch-icon"]');
  265. Array.prototype.forEach.call(favicos, function(favico) {
  266. favicosByRel[favico.rel.toLowerCase()] = favico;
  267. });
  268. favico = favicosByRel["shortcut icon"] || favicosByRel["icon"] || favicosByRel["apple-touch-icon"];
  269. if (favico && favico.href != EMPTY_PIXEL_DATA)
  270. return favico.href;
  271. return null;
  272. }
  273. },
  274. canvas : {
  275. replace : function() {
  276. Array.prototype.forEach.call(targetDoc.querySelectorAll("canvas"), function(node) {
  277. var i, data, newNode = targetDoc.createElement("img");
  278. try {
  279. data = node.toDataURL("image/png", "");
  280. } catch (e) {
  281. }
  282. if (data) {
  283. newNode.setAttribute("src", data);
  284. for (i = 0; i < node.attributes.length; i++)
  285. if (node.attributes[i].value)
  286. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  287. if (!newNode.width)
  288. newNode.style.pixelWidth = node.clientWidth;
  289. if (!newNode.height)
  290. newNode.style.pixelHeight = node.clientHeight;
  291. node.parentElement.replaceChild(newNode, node);
  292. }
  293. });
  294. }
  295. },
  296. svg : {
  297. get : function(doc, sendRequest) {
  298. var SVG_SELECTOR = 'object[type="image/svg+xml"], object[type="image/svg-xml"], embed[src*=".svg"]';
  299. Array.prototype.forEach.call(doc.querySelectorAll(SVG_SELECTOR), function(node) {
  300. var url = formatURL(node.data || node.src, targetDoc.baseURI);
  301. if (url.indexOf("data:") != 0)
  302. sendRequest(url, function(data) {
  303. node.setAttribute(node.data ? "data" : "src", getDataURI(data, "data:text/xml,<svg></svg>", true));
  304. }, false, true);
  305. });
  306. }
  307. },
  308. link : {
  309. get : function(doc, sendRequest) {
  310. var LINK_SELECTOR = 'link[href][rel*="stylesheet"]';
  311. Array.prototype.forEach.call(doc.querySelectorAll(LINK_SELECTOR), function(node) {
  312. if (node.href.indexOf("data:") != 0)
  313. sendRequest(node.href, function(data) {
  314. var i, newNode, commentNode;
  315. if (data.mediaType == "text/html") {
  316. node.parentElement.removeChild(node);
  317. return;
  318. }
  319. newNode = targetDoc.createElement("style");
  320. for (i = 0; i < node.attributes.length; i++)
  321. if (node.attributes[i].value)
  322. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  323. newNode._href = node.href;
  324. newNode.removeAttribute("href");
  325. newNode.textContent = resolveURLs(data.content || "", data.url) + "\n";
  326. if (node.disabled) {
  327. commentNode = doc.createComment();
  328. commentNode.textContent = newNode.outerHTML.replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/--/g,"&minus;&minus;");
  329. node.parentElement.replaceChild(commentNode, node);
  330. }
  331. else
  332. node.parentElement.replaceChild(newNode, node);
  333. });
  334. });
  335. }
  336. },
  337. script : {
  338. get : function(doc, sendRequest) {
  339. var SCRIPT_SELECTOR = 'script[src]';
  340. Array.prototype.forEach.call(doc.querySelectorAll(SCRIPT_SELECTOR), function(node) {
  341. if (node.src.indexOf("data:") != 0)
  342. sendRequest(node.src, function(data) {
  343. data.content = data.content.replace(/"([^"]*)<\/\s*script\s*>([^"]*)"/gi, '"$1<"+"/script>$2"');
  344. data.content = data.content.replace(/'([^']*)<\/\s*script\s*>([^']*)'/gi, "'$1<'+'/script>$2'");
  345. node.textContent = [ "\n", data.content, "\n" ].join("");
  346. node.removeAttribute("src");
  347. }, false, false, targetDoc.characterSet);
  348. });
  349. },
  350. remove : function(doc) {
  351. var i, nodes = doc.querySelectorAll('script'), body = doc.querySelector("html > body");
  352. for (i = 0; i < nodes.length; i++)
  353. nodes[i].parentElement.removeChild(nodes[i]);
  354. if (body && body.getAttribute("onload"))
  355. body.removeAttribute("onload");
  356. }
  357. },
  358. style : {
  359. getURL : function(doc, sendRequest) {
  360. Array.prototype.forEach.call(doc.querySelectorAll("style"), function(styleSheet) {
  361. replaceCssURLs(function() {
  362. return styleSheet.textContent;
  363. }, function(value) {
  364. styleSheet.textContent = value;
  365. }, styleSheet._href || targetDoc.baseURI, sendRequest);
  366. });
  367. },
  368. getImport : function(doc, sendRequest) {
  369. Array.prototype.forEach.call(doc.querySelectorAll("style"), function(styleSheet) {
  370. var i, url, result, imports = removeCssComments(styleSheet.textContent).match(IMPORT_EXP);
  371. if (imports)
  372. for (i = 0; i < imports.length; i++) {
  373. result = imports[i].match(IMPORT_URL_VALUE_EXP);
  374. if (result && (result[2] || result[4])) {
  375. url = formatURL(result[2] || result[4], styleSheet._href || targetDoc.baseURI);
  376. if (url.indexOf("data:") != 0)
  377. (function(imp) {
  378. sendRequest(url, function(data) {
  379. styleSheet.textContent = styleSheet.textContent.replace(imp, data.content ? resolveURLs(data.content, data.url)
  380. : "");
  381. }, false, false, targetDoc.characterSet);
  382. })(imports[i]);
  383. }
  384. }
  385. });
  386. },
  387. removeUnused : function() {
  388. Array.prototype.forEach.call(targetDoc.querySelectorAll("style"), function(style) {
  389. var cssRules = [];
  390. function process(rules) {
  391. var selector;
  392. Array.prototype.forEach.call(rules, function(rule) {
  393. if (rule instanceof CSSMediaRule) {
  394. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  395. process(rule.cssRules, true);
  396. cssRules.push("}");
  397. } else if (rule.selectorText) {
  398. selector = trim(rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, ''));
  399. if (selector) {
  400. try {
  401. if (targetDoc.querySelector(selector))
  402. cssRules.push(rule.cssText);
  403. } catch (e) {
  404. cssRules.push(rule.cssText);
  405. }
  406. }
  407. }
  408. });
  409. }
  410. if (style.sheet) {
  411. process(style.sheet.rules);
  412. style.innerText = cssRules.join("");
  413. }
  414. });
  415. }
  416. }
  417. };
  418. })(singlefile);