content.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /*
  2. * Copyright 2011 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile Core.
  6. *
  7. * SingleFile Core is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile Core is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile Core. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function() {
  21. var bgPort, docs = {}, pageId = singlefile.pageId, doc = document, docElement, canvasData = [], config = singlefile.config;
  22. function RequestManager(pageId, winId) {
  23. var requestId = 0, callbacks = [];
  24. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  25. callbacks[requestId] = responseHandler;
  26. bgPort.postMessage({
  27. getResourceContentRequest : true,
  28. pageId : pageId,
  29. winId : winId,
  30. requestId : requestId,
  31. url : url,
  32. characterSet : characterSet,
  33. mediaTypeParam : mediaTypeParam
  34. });
  35. requestId++;
  36. };
  37. this.onResponse = function(id, content) {
  38. callbacks[id](content);
  39. callbacks[id] = null;
  40. };
  41. }
  42. function removeUnusedCSSRules() {
  43. Array.prototype.forEach.call(document.querySelectorAll("style"), function(style) {
  44. var cssRules = [];
  45. function process(rules) {
  46. Array.prototype.forEach.call(rules, function(rule) {
  47. var selector;
  48. if (rule.media) {
  49. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  50. process(rule.cssRules, true);
  51. cssRules.push("}");
  52. } else if (rule.selectorText) {
  53. selector = rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, '').trim();
  54. if (selector)
  55. try {
  56. if (document.querySelector(selector))
  57. cssRules.push(rule.cssText);
  58. } catch (e) {
  59. cssRules.push(rule.cssText);
  60. }
  61. }
  62. });
  63. }
  64. if (style.sheet) {
  65. process(style.sheet.rules);
  66. style.innerText = cssRules.join("");
  67. }
  68. });
  69. }
  70. function removeHiddenElements() {
  71. Array.prototype.forEach.call(doc.querySelectorAll("html > body *:not(style):not(script):not(link):not(area)"), function(element) {
  72. var style = getComputedStyle(element);
  73. if ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0))
  74. element.parentElement.removeChild(element);
  75. });
  76. }
  77. function getSelectedContent() {
  78. var node, wrapper, clonedNode, selection = getSelection(), range = selection.rangeCount ? selection.getRangeAt(0) : null;
  79. function addStyle(node) {
  80. var rules, cssText;
  81. Array.prototype.forEach.call(node.children, function(child) {
  82. addStyle(child);
  83. });
  84. rules = getMatchedCSSRules(node, '', false);
  85. cssText = "";
  86. Array.prototype.forEach.call(rules, function(rule) {
  87. cssText += rule.style.cssText;
  88. });
  89. node.setAttribute("style", cssText);
  90. }
  91. if (range && range.startOffset != range.endOffset) {
  92. node = range.commonAncestorContainer;
  93. if (node.nodeType != node.ELEMENT_NODE)
  94. node = node.parentElement;
  95. clonedNode = node.cloneNode(true);
  96. addStyle(node);
  97. node.parentElement.replaceChild(clonedNode, node);
  98. }
  99. return node;
  100. }
  101. function getCanvasData(doc) {
  102. var canvasData = [];
  103. Array.prototype.forEach.call(doc.querySelectorAll("canvas"), function(node) {
  104. var data = null;
  105. try {
  106. data = node.toDataURL("image/png", "");
  107. } catch (e) {
  108. }
  109. canvasData.push(data);
  110. });
  111. return canvasData;
  112. }
  113. function initProcess(doc, docElement, winId, topWindow, canvasData) {
  114. var requestManager = new RequestManager(pageId, winId);
  115. docs[winId] = {
  116. doc : doc,
  117. docElement : docElement,
  118. frames : docElement.querySelectorAll("iframe, frame"),
  119. requestManager : requestManager,
  120. processDoc : singlefile.initProcess(doc, docElement, topWindow, doc.baseURI, doc.characterSet, config, canvasData, requestManager, function(
  121. maxIndex) {
  122. bgPort.postMessage({
  123. docInit : true,
  124. pageId : pageId,
  125. winId : winId,
  126. maxIndex : maxIndex
  127. });
  128. }, function(index) {
  129. bgPort.postMessage({
  130. docProgress : true,
  131. pageId : pageId,
  132. winId : winId,
  133. index : index
  134. });
  135. }, function() {
  136. bgPort.postMessage({
  137. docEnd : true,
  138. pageId : pageId,
  139. winId : winId,
  140. content : topWindow ? null : singlefile.util.getDocContent(doc, docElement)
  141. });
  142. })
  143. };
  144. }
  145. function sendFgProcessInit(title, url, baseURI, winId, winIndex) {
  146. bgPort.postMessage({
  147. processInit : true,
  148. pageId : pageId,
  149. topWindow : winId ? false : window == top,
  150. url : url || location.href,
  151. title : title || doc.title,
  152. baseURI : baseURI || doc.baseURI,
  153. winId : winId || wininfo.winId,
  154. index : winIndex || wininfo.index
  155. });
  156. }
  157. function sendBgProcessInit(content, title, url, baseURI, characterSet, winId, winIndex) {
  158. bgPort.postMessage({
  159. processInit : true,
  160. pageId : pageId,
  161. topWindow : winId ? false : window == top,
  162. url : url || location.href,
  163. title : title || doc.title,
  164. content : content,
  165. baseURI : baseURI || doc.baseURI,
  166. characterSet : characterSet || doc.characterSet,
  167. canvasData : canvasData,
  168. winId : winId || wininfo.winId,
  169. index : winIndex || wininfo.index
  170. });
  171. }
  172. // ----------------------------------------------------------------------------------------------
  173. function init() {
  174. var selectedContent = getSelectedContent(), topWindow = window == top;
  175. function doFgProcessInit() {
  176. sendFgProcessInit();
  177. if (docElement && (!singlefile.processSelection || selectedContent)) {
  178. initProcess(doc, docElement, wininfo.winId, topWindow, canvasData);
  179. if (topWindow && !config.removeFrames && !config.getRawDoc)
  180. wininfo.frames.forEach(function(frame) {
  181. if (frame.sameDomain)
  182. wininfo.getContent(frame, function(message) {
  183. var frameDoc = document.implementation.createHTMLDocument();
  184. frameDoc.open();
  185. frameDoc.write(message.content);
  186. frameDoc.close();
  187. sendFgProcessInit(message.title, message.url, message.baseURI, frame.winId, frame.index);
  188. initProcess(frameDoc, frameDoc.documentElement, frame.winId, false, getCanvasData(frameDoc));
  189. });
  190. });
  191. }
  192. }
  193. function bgProcessInit() {
  194. var xhr;
  195. if (singlefile.processSelection) {
  196. if (selectedContent || topWindow)
  197. sendBgProcessInit(topWindow ? null : singlefile.util.getDocContent(doc, selectedContent));
  198. } else {
  199. if (config.getRawDoc && topWindow) {
  200. xhr = new XMLHttpRequest();
  201. xhr.onreadystatechange = function() {
  202. if (xhr.readyState == 4)
  203. sendBgProcessInit(xhr.responseText);
  204. };
  205. xhr.open("GET", doc.location.href, true);
  206. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  207. xhr.send(null);
  208. } else {
  209. sendBgProcessInit(singlefile.util.getDocContent(doc));
  210. if (topWindow && !config.removeFrames)
  211. wininfo.frames.forEach(function(frame) {
  212. if (frame.sameDomain)
  213. wininfo.getContent(frame, function(message) {
  214. sendBgProcessInit(message.content, message.title, message.url, message.baseURI, message.characterSet, frame.winId,
  215. frame.index);
  216. });
  217. });
  218. }
  219. }
  220. }
  221. function fgProcessInit() {
  222. var xhr, tmpDoc;
  223. if (singlefile.processSelection) {
  224. if (selectedContent || topWindow) {
  225. docElement = selectedContent;
  226. doFgProcessInit();
  227. }
  228. } else if (config.getRawDoc && topWindow) {
  229. xhr = new XMLHttpRequest();
  230. xhr.onreadystatechange = function() {
  231. if (xhr.readyState == 4) {
  232. tmpDoc = document.implementation.createHTMLDocument();
  233. tmpDoc.open();
  234. tmpDoc.write(xhr.responseText);
  235. tmpDoc.close();
  236. docElement = doc.importNode(tmpDoc.documentElement, true);
  237. doFgProcessInit();
  238. }
  239. };
  240. xhr.open("GET", doc.location.href, true);
  241. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  242. xhr.send(null);
  243. } else {
  244. docElement = doc.documentElement.cloneNode(true);
  245. doFgProcessInit();
  246. }
  247. }
  248. Array.prototype.forEach.call(doc.querySelectorAll("noscript"), function(node) {
  249. node.textContent = "";
  250. });
  251. canvasData = getCanvasData(doc);
  252. if (config.removeHidden)
  253. removeHiddenElements();
  254. if (topWindow)
  255. document.documentElement.insertBefore(document.createComment("\n Archive processed by SingleFile \n url: " + location.href + " \n saved date: "
  256. + new Date() + " \n"), document.documentElement.firstChild);
  257. if ((!config.removeFrames && !config.getRawDoc) || topWindow)
  258. if (config.processInBackground)
  259. bgProcessInit();
  260. else
  261. fgProcessInit();
  262. }
  263. function setContentRequest(message) {
  264. var mutationEventId = 0, winId = wininfo.winId;
  265. function resetWindowProperties(winPropertiesStr) {
  266. var property, winProp, customEvent;
  267. try {
  268. winProp = eval("(" + winPropertiesStr + ")");
  269. customEvent = document.createEvent("CustomEvent");
  270. for (property in window)
  271. if (!winProp[property])
  272. window[property] = null;
  273. customEvent.initCustomEvent("WindowPropertiesCleaned", true, true);
  274. document.dispatchEvent(customEvent);
  275. } catch (e) {
  276. console.log(e);
  277. }
  278. }
  279. function onDOMSubtreeModified(event) {
  280. var id = mutationEventId, element = event.target, processDocFn;
  281. function onSetDocFragment(message) {
  282. if (message.setDocFragment && message.mutationEventId == id) {
  283. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  284. element.innerHTML = message.content;
  285. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  286. bgPort.onMessage.removeListener(onSetDocFragment);
  287. }
  288. }
  289. if (element.innerHTML) {
  290. if (config.processInBackground) {
  291. bgPort.postMessage({
  292. processDocFragment : true,
  293. pageId : pageId,
  294. winId : winId,
  295. content : element.innerHTML,
  296. mutationEventId : id
  297. });
  298. bgPort.onMessage.addListener(onSetDocFragment);
  299. mutationEventId++;
  300. } else
  301. processDocFn = singlefile.initProcess(doc, element, false, doc.baseURI, doc.characterSet, config, canvasData, docs[winId].requestManager,
  302. function(maxIndex) {
  303. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  304. processDocFn();
  305. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  306. });
  307. }
  308. event.preventDefault();
  309. }
  310. function onWindowPropertiesCleaned() {
  311. var tmpDoc;
  312. function replaceDoc() {
  313. doc.replaceChild(docElement, doc.documentElement);
  314. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  315. }
  316. doc.removeEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  317. if (config.processInBackground || singlefile.processSelection || (!config.processInBackground && !config.removeScripts))
  318. if (location.pathname.indexOf(".txt") + 4 != location.pathname.length) {
  319. doc.open();
  320. doc.write(message.content);
  321. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  322. doc.close();
  323. } else {
  324. tmpDoc = document.implementation.createHTMLDocument();
  325. tmpDoc.open();
  326. tmpDoc.write(message.content);
  327. tmpDoc.close();
  328. docElement = doc.importNode(tmpDoc.documentElement, true);
  329. replaceDoc();
  330. }
  331. else
  332. replaceDoc();
  333. if (config.removeUnusedCSSRules)
  334. removeUnusedCSSRules();
  335. setContentResponse();
  336. }
  337. function sendSetContentResponse(content) {
  338. bgPort.postMessage({
  339. setContentResponse : true,
  340. winId : "0",
  341. pageId : pageId,
  342. content : config.savePage || config.getContent ? content : null
  343. });
  344. }
  345. function setContentResponse() {
  346. if (singlefile.processSelection)
  347. sendSetContentResponse(message.content);
  348. else {
  349. if (config.processInBackground)
  350. sendSetContentResponse(singlefile.util.getDocContent(doc, doc.documentElement));
  351. else
  352. sendSetContentResponse(config.removeUnusedCSSRules ? singlefile.util.getDocContent(doc, doc.documentElement) : singlefile.util
  353. .getDocContent(doc, docElement));
  354. }
  355. }
  356. if (config.displayProcessedPage) {
  357. window.location.href = "javascript:(" + resetWindowProperties.toString() + ")('" + JSON.stringify(message.winProperties) + "')";
  358. doc.addEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  359. } else
  360. setContentResponse();
  361. }
  362. function getResourceContentResponse(message) {
  363. docs[message.winId].requestManager.onResponse(message.requestId, message.content);
  364. }
  365. function setFrameContentRequest(message) {
  366. docs[message.winId].frames[message.index].setAttribute("src", "data:text/html;charset=utf-8," + encodeURI(message.content));
  367. bgPort.postMessage({
  368. setFrameContentResponse : true,
  369. pageId : pageId,
  370. winId : message.winId,
  371. index : message.index
  372. });
  373. }
  374. function getContentRequest(message) {
  375. if (docs[message.winId].doc)
  376. bgPort.postMessage({
  377. getContentResponse : true,
  378. winId : message.winId,
  379. pageId : pageId,
  380. content : singlefile.util.getDocContent(docs[message.winId].doc, docs[message.winId].docElement)
  381. });
  382. else
  383. bgPort.postMessage({
  384. getContentResponse : true,
  385. pageId : pageId,
  386. winId : message.winId,
  387. content : singlefile.util.getDocContent(doc, docElement)
  388. });
  389. }
  390. function processDoc(message) {
  391. if (docs[message.winId])
  392. docs[message.winId].processDoc();
  393. }
  394. bgPort = chrome.extension.connect({
  395. name : "singlefile"
  396. });
  397. bgPort.onMessage.addListener(function(message) {
  398. // if (!message.getResourceContentResponse)
  399. // console.log(message);
  400. if (message.getResourceContentResponse)
  401. getResourceContentResponse(message);
  402. if (message.setFrameContentRequest)
  403. setFrameContentRequest(message);
  404. if (message.getContentRequest)
  405. getContentRequest(message);
  406. if (message.setContentRequest)
  407. setContentRequest(message);
  408. if (message.processDoc)
  409. processDoc(message);
  410. });
  411. if (doc.documentElement instanceof HTMLHtmlElement)
  412. init();
  413. })();