filters.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. * Copyright 2010 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function(holder) {
  21. var IMPORT_URL_VALUE_EXP = /(url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\))|(@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;))/i;
  22. var URL_VALUE_EXP = /url\s*\(\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*\)/i;
  23. var IMPORT_VALUE_ALT_EXP = /@import\s*\(?\s*(?:'|")?\s*([^('|"|\))]*)\s*(?:'|")?\s*(?:\)|;)/i;
  24. var URL_EXP = /url\s*\(([^\)]*)\)/gi;
  25. var IMPORT_EXP = /(@import\s*url\s*\([^\)]*\)\s*;?)|(@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;)/gi;
  26. var IMPORT_ALT_EXP = /@import\s*('|")?\s*[^\(|;|'|"]*\s*('|")?\s*;/gi;
  27. var EMPTY_PIXEL_DATA = "data:image/gif;base64,R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==";
  28. var targetDoc;
  29. function trim(s) {
  30. return s.replace(/^\s*([\S\s]*?)\s*$/, '$1');
  31. }
  32. function formatURL(link, host) {
  33. var i, newlinkparts, hparts, lparts = link.split('/');
  34. host = host.split("#")[0].split("?")[0];
  35. if (/http:|https:|ftp:|data:|javascript:/i.test(lparts[0]))
  36. return trim(link);
  37. hparts = host.split('/');
  38. newlinkparts = [];
  39. if (hparts.length > 3)
  40. hparts.pop();
  41. if (lparts[0] == '') {
  42. if (lparts[1] == '')
  43. host = hparts[0] + '//' + lparts[2];
  44. else
  45. host = hparts[0] + '//' + hparts[2];
  46. hparts = host.split('/');
  47. delete lparts[0];
  48. if (lparts[1] == '') {
  49. delete lparts[1];
  50. delete lparts[2];
  51. }
  52. }
  53. for (i = 0; i < lparts.length; i++) {
  54. if (lparts[i] == '..') {
  55. if (lparts[i - 1])
  56. delete lparts[i - 1];
  57. else if (hparts.length > 3)
  58. hparts.pop();
  59. delete lparts[i];
  60. }
  61. if (lparts[i] == '.')
  62. delete lparts[i];
  63. }
  64. for (i = 0; i < lparts.length; i++)
  65. if (lparts[i])
  66. newlinkparts[newlinkparts.length] = lparts[i];
  67. return trim(hparts.join('/') + '/' + newlinkparts.join('/'));
  68. }
  69. function resolveURLs(content, host) {
  70. var ret = content.replace(URL_EXP, function(value) {
  71. var result = value.match(URL_VALUE_EXP);
  72. if (result)
  73. if (result[1].indexOf("data:") != 0)
  74. return value.replace(result[1], formatURL(result[1], host));
  75. return value;
  76. });
  77. return ret.replace(IMPORT_ALT_EXP, function(value) {
  78. var result = value.match(IMPORT_VALUE_ALT_EXP);
  79. if (result)
  80. if (result[1].indexOf("data:") != 0)
  81. return "@import \"" + formatURL(result[1], host) + "\";";
  82. return value;
  83. });
  84. }
  85. function getDataURI(data, defaultURL, woURL) {
  86. if (data.content)
  87. return (woURL ? "" : "url(") + "data:" + data.mediaType + ";" + data.mediaTypeParam + "," + data.content + (woURL ? "" : ")");
  88. else
  89. return woURL ? defaultURL : ("url(" + defaultURL + ")");
  90. }
  91. function removeCssComments(content) {
  92. var start, end;
  93. do {
  94. start = content.indexOf("/*");
  95. end = content.indexOf("*/", start);
  96. if (start != -1 && end != -1)
  97. content = content.substring(0, start) + content.substr(end + 2);
  98. } while (start != -1 && end != -1);
  99. return content;
  100. }
  101. function replaceCssURLs(getStyle, setStyle, host, callback) {
  102. var i, url, result, values = removeCssComments(getStyle()).match(URL_EXP);
  103. if (values)
  104. for (i = 0; i < values.length; i++) {
  105. result = values[i].match(URL_VALUE_EXP);
  106. if (result && result[1]) {
  107. url = formatURL(result[1], host);
  108. if (url.indexOf("data:") != 0)
  109. (function(origUrl) {
  110. callback(url, function(data) {
  111. if (getStyle().indexOf(origUrl) != -1)
  112. setStyle(getStyle().replace(new RegExp(origUrl.replace(/([{}\(\)\^$&.\*\?\/\+\|\[\\\\]|\]|\-)/g, "\\$1"), "gi"),
  113. getDataURI(data, EMPTY_PIXEL_DATA, true)));
  114. }, true);
  115. })(result[1]);
  116. }
  117. }
  118. }
  119. holder.filters = {
  120. init : function(doc) {
  121. targetDoc = doc;
  122. },
  123. document : {
  124. getStylesheets : function(doc, sendRequest) {
  125. holder.filters.link.get(doc, sendRequest);
  126. holder.filters.style.getImport(doc, sendRequest);
  127. },
  128. get : function(doc, sendRequest, topWindow) {
  129. holder.filters.styleAttr.get(doc, sendRequest);
  130. holder.filters.bgAttr.get(doc, sendRequest);
  131. holder.filters.image.get(doc, sendRequest);
  132. if (topWindow)
  133. holder.filters.image.getFavico(doc, sendRequest);
  134. holder.filters.svg.get(doc, sendRequest);
  135. holder.filters.script.get(doc, sendRequest);
  136. holder.filters.style.getURL(doc, sendRequest);
  137. },
  138. getDoctype : function() {
  139. var docType = targetDoc.doctype, docTypeStr;
  140. if (docType) {
  141. docTypeStr = "<!DOCTYPE " + docType.nodeName;
  142. if (docType.publicId) {
  143. docTypeStr += " PUBLIC \"" + docType.publicId + "\"";
  144. if (docType.systemId)
  145. docTypeStr += " \"" + docType.systemId + "\"";
  146. } else if (docType.systemId)
  147. docTypeStr += " SYSTEM \"" + docType.systemId + "\"";
  148. if (docType.internalSubset)
  149. docTypeStr += " [" + docType.internalSubset + "]";
  150. return docTypeStr + ">\n";
  151. }
  152. return "";
  153. }
  154. },
  155. element : {
  156. clean : function(doc) {
  157. Array.prototype.forEach.call(doc.querySelectorAll("blockquote[cite]"), function(element) {
  158. element.removeAttribute("cite");
  159. });
  160. },
  161. removeHidden : function() {
  162. if (targetDoc.body)
  163. Array.prototype.forEach.call(targetDoc.body.querySelectorAll("*:not(style):not(script):not(link):not(area)"), function(element) {
  164. var style = getComputedStyle(element);
  165. if ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0) && (element.id != "__SingleFile_mask__"))
  166. element.parentElement.removeChild(element);
  167. });
  168. }
  169. },
  170. frame : {
  171. clean : function() {
  172. Array.prototype.forEach.call(targetDoc.querySelectorAll("iframe[src], frame[src]"), function(frame) {
  173. if (!frame.src)
  174. frame.removeAttribute("src");
  175. });
  176. },
  177. count : function() {
  178. return targetDoc.querySelectorAll("iframe[src], frame[src]").length;
  179. },
  180. remove : function(doc) {
  181. Array.prototype.forEach.call(doc.querySelectorAll("iframe[src], frame[src]"), function(frame) {
  182. frame.src = "about:blank";
  183. });
  184. },
  185. set : function(doc, urlsArray) {
  186. Array.prototype.forEach.call(doc.querySelectorAll("iframe[src], frame[src]"), function(frame, index) {
  187. frame.src = urlsArray[index] || "about:blank";
  188. });
  189. }
  190. },
  191. object : {
  192. remove : function(doc) {
  193. var i, nodes = doc.querySelectorAll('applet, object:not([type="image/svg+xml"]):not([type="image/svg-xml"]), embed:not([src*=".svg"])');
  194. for (i = 0; i < nodes.length; i++)
  195. nodes[i].parentElement.removeChild(nodes[i]);
  196. }
  197. },
  198. styleAttr : {
  199. get : function(doc, sendRequest) {
  200. var STYLE_ATTR_SELECTOR = "*[style]";
  201. Array.prototype.forEach.call(doc.querySelectorAll(STYLE_ATTR_SELECTOR), function(node) {
  202. replaceCssURLs(function() {
  203. return node.getAttribute("style");
  204. }, function(value) {
  205. node.setAttribute("style", value);
  206. }, targetDoc.baseURI, sendRequest);
  207. });
  208. }
  209. },
  210. bgAttr : {
  211. get : function(doc, sendRequest) {
  212. var BG_SELECTOR = 'body[background],table[background],thead[background],tbody[background],tr[background],th[background],td[background]';
  213. Array.prototype.forEach.call(doc.querySelectorAll(BG_SELECTOR), function(node) {
  214. var url, value = node.getAttribute("background");
  215. if (value.indexOf(".") != -1) {
  216. url = formatURL(value, targetDoc.baseURI);
  217. if (url.indexOf("data:") != 0)
  218. sendRequest(url, function(data) {
  219. node.setAttribute("background", getDataURI(data, EMPTY_PIXEL_DATA, true));
  220. }, true);
  221. }
  222. });
  223. }
  224. },
  225. image : {
  226. get : function(doc, sendRequest) {
  227. var IMG_SELECTOR = 'link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"], img[src], input[src][type="image"]';
  228. Array.prototype.forEach.call(doc.querySelectorAll(IMG_SELECTOR), function(node) {
  229. var url = formatURL(node.href || node.src, targetDoc.baseURI);
  230. if (url.indexOf("data:") != 0)
  231. sendRequest(url, function(data) {
  232. node.setAttribute(node.href ? "href" : "src", getDataURI(data, EMPTY_PIXEL_DATA, true));
  233. }, true);
  234. });
  235. },
  236. getFavico : function(doc, sendRequest) {
  237. var node, foundLink = false, IMG_SELECTOR = 'link[href][rel="shortcut icon"], link[href][rel="apple-touch-icon"], link[href][rel="icon"]';
  238. Array.prototype.forEach.call(doc.querySelectorAll(IMG_SELECTOR), function(n) {
  239. var url = formatURL(n.href, targetDoc.baseURI);
  240. if (!foundLink && url.indexOf("data:") != 0)
  241. foundLink = true;
  242. });
  243. if (!foundLink) {
  244. node = targetDoc.createElement("link");
  245. node.type = "image/x-icon";
  246. node.rel = "shortcut icon";
  247. node.href = "/favicon.ico";
  248. doc.querySelector("html > head").appendChild(node);
  249. sendRequest(node.href, function(data) {
  250. node.setAttribute(node.href ? "href" : "src", getDataURI(data, EMPTY_PIXEL_DATA, true));
  251. }, true);
  252. }
  253. }
  254. },
  255. svg : {
  256. get : function(doc, sendRequest) {
  257. var SVG_SELECTOR = 'object[type="image/svg+xml"], object[type="image/svg-xml"], embed[src*=".svg"]';
  258. Array.prototype.forEach.call(doc.querySelectorAll(SVG_SELECTOR), function(node) {
  259. var url = formatURL(node.data || node.src, targetDoc.baseURI);
  260. if (url.indexOf("data:") != 0)
  261. sendRequest(url, function(data) {
  262. node.setAttribute(node.data ? "data" : "src", getDataURI(data, "data:text/xml,<svg></svg>", true));
  263. }, false, true);
  264. });
  265. }
  266. },
  267. link : {
  268. get : function(doc, sendRequest) {
  269. var LINK_SELECTOR = 'link[href][rel*="stylesheet"]';
  270. Array.prototype.forEach.call(doc.querySelectorAll(LINK_SELECTOR), function(node) {
  271. if (node.href.indexOf("data:") != 0)
  272. sendRequest(node.href, function(data) {
  273. var i, newNode;
  274. newNode = targetDoc.createElement("style");
  275. for (i = 0; i < node.attributes.length; i++)
  276. if (node.attributes[i].value)
  277. newNode.setAttribute(node.attributes[i].name, node.attributes[i].value);
  278. newNode._href = node.href;
  279. newNode.removeAttribute("href");
  280. newNode.textContent = resolveURLs(data.content || "", data.url) + "\n";
  281. node.parentElement.replaceChild(newNode, node);
  282. });
  283. });
  284. }
  285. },
  286. script : {
  287. get : function(doc, sendRequest) {
  288. var SCRIPT_SELECTOR = 'script[src]';
  289. Array.prototype.forEach.call(doc.querySelectorAll(SCRIPT_SELECTOR), function(node) {
  290. if (node.src.indexOf("data:") != 0)
  291. sendRequest(node.src, function(data) {
  292. data.content = data.content.replace(/"([^"]*)<\/\s*script\s*>([^"]*)"/gi, '"$1<"+"/script>$2"');
  293. data.content = data.content.replace(/'([^']*)<\/\s*script\s*>([^']*)'/gi, "'$1<'+'/script>$2'");
  294. node.textContent = [ "\n", data.content, "\n" ].join("");
  295. node.removeAttribute("src");
  296. }, false, false, targetDoc.characterSet);
  297. });
  298. },
  299. remove : function(doc) {
  300. var i, nodes = doc.querySelectorAll('script'), body = doc.querySelector("html > body");
  301. for (i = 0; i < nodes.length; i++)
  302. nodes[i].parentElement.removeChild(nodes[i]);
  303. if (body && body.getAttribute("onload"))
  304. body.removeAttribute("onload");
  305. }
  306. },
  307. style : {
  308. getURL : function(doc, sendRequest) {
  309. Array.prototype.forEach.call(doc.querySelectorAll("style"), function(styleSheet) {
  310. replaceCssURLs(function() {
  311. return styleSheet.textContent;
  312. }, function(value) {
  313. styleSheet.textContent = value
  314. }, styleSheet._href || targetDoc.baseURI, sendRequest);
  315. });
  316. },
  317. getImport : function(doc, sendRequest) {
  318. Array.prototype.forEach.call(doc.querySelectorAll("style"), function(styleSheet) {
  319. var i, url, result, imports = removeCssComments(styleSheet.textContent).match(IMPORT_EXP);
  320. if (imports)
  321. for (i = 0; i < imports.length; i++) {
  322. result = imports[i].match(IMPORT_URL_VALUE_EXP);
  323. if (result && (result[2] || result[4])) {
  324. url = formatURL(result[2] || result[4], styleSheet._href || targetDoc.baseURI);
  325. if (url.indexOf("data:") != 0)
  326. (function(imp) {
  327. sendRequest(url, function(data) {
  328. styleSheet.textContent = styleSheet.textContent.replace(imp, data.content ? resolveURLs(data.content, data.url)
  329. : "");
  330. }, false, false, targetDoc.characterSet);
  331. })(imports[i]);
  332. }
  333. }
  334. });
  335. },
  336. removeUnused : function() {
  337. Array.prototype.forEach.call(targetDoc.querySelectorAll("style"), function(style) {
  338. var cssRules = [];
  339. function process(rules) {
  340. var selector;
  341. Array.prototype.forEach.call(rules, function(rule) {
  342. if (rule instanceof CSSMediaRule) {
  343. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  344. process(rule.cssRules, true);
  345. cssRules.push("}");
  346. } else if (rule.selectorText) {
  347. selector = trim(rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, ''));
  348. if (selector) {
  349. try {
  350. if (targetDoc.querySelector(selector))
  351. cssRules.push(rule.cssText);
  352. } catch (e) {
  353. cssRules.push(rule.cssText);
  354. }
  355. }
  356. }
  357. });
  358. }
  359. if (style.sheet) {
  360. process(style.sheet.rules);
  361. style.innerText = cssRules.join("");
  362. }
  363. });
  364. }
  365. }
  366. };
  367. })(singlefile);