content.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*
  2. * Copyright 2011 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile Core.
  6. *
  7. * SingleFile Core is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile Core is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile Core. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function() {
  21. var bgPort, docs = {}, pageId = singlefile.pageId, doc = document, docElement, canvasData = [], config = singlefile.config;
  22. function RequestManager(pageId, winId) {
  23. var requestId = 0, callbacks = [];
  24. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  25. callbacks[requestId] = responseHandler;
  26. bgPort.postMessage({
  27. getResourceContentRequest : true,
  28. pageId : pageId,
  29. winId : winId,
  30. requestId : requestId,
  31. url : url,
  32. characterSet : characterSet,
  33. mediaTypeParam : mediaTypeParam
  34. });
  35. requestId++;
  36. };
  37. this.onResponse = function(id, content) {
  38. callbacks[id](content);
  39. callbacks[id] = null;
  40. };
  41. }
  42. function removeUnusedCSSRules() {
  43. Array.prototype.forEach.call(document.querySelectorAll("style"), function(style) {
  44. var cssRules = [];
  45. function process(rules) {
  46. Array.prototype.forEach.call(rules, function(rule) {
  47. var selector;
  48. if (rule.media) {
  49. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  50. process(rule.cssRules);
  51. cssRules.push("}");
  52. } else if (rule.selectorText) {
  53. selector = rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, '').trim();
  54. if (selector)
  55. try {
  56. if (document.querySelector(selector))
  57. cssRules.push(rule.cssText);
  58. } catch (e) {
  59. cssRules.push(rule.cssText);
  60. }
  61. }
  62. });
  63. }
  64. if (style.sheet) {
  65. process(style.sheet.rules);
  66. style.innerText = cssRules.join("");
  67. }
  68. });
  69. }
  70. function removeHiddenElements() {
  71. Array.prototype.forEach.call(doc.querySelectorAll("html > body *:not(style):not(script):not(link):not(area)"), function(element) {
  72. var style = getComputedStyle(element), tagName = element.tagName.toLowerCase();
  73. if (tagName != "iframe" && tagName != "frame" && ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0)))
  74. element.parentElement.removeChild(element);
  75. });
  76. }
  77. function getSelectedContent() {
  78. var node, wrapper, clonedNode, selection = getSelection(), range = selection.rangeCount ? selection.getRangeAt(0) : null;
  79. function addStyle(node) {
  80. var rules, cssText;
  81. Array.prototype.forEach.call(node.children, function(child) {
  82. addStyle(child);
  83. });
  84. rules = getMatchedCSSRules(node, '', false);
  85. if (rules) {
  86. cssText = "";
  87. Array.prototype.forEach.call(rules, function(rule) {
  88. cssText += rule.style.cssText;
  89. });
  90. node.setAttribute("style", cssText);
  91. }
  92. }
  93. if (range && range.startOffset != range.endOffset) {
  94. node = range.commonAncestorContainer;
  95. if (node.nodeType != node.ELEMENT_NODE)
  96. node = node.parentElement;
  97. clonedNode = node.cloneNode(true);
  98. addStyle(node);
  99. node.parentElement.replaceChild(clonedNode, node);
  100. }
  101. return node;
  102. }
  103. function getCanvasData(doc) {
  104. var canvasData = [];
  105. Array.prototype.forEach.call(doc.querySelectorAll("canvas"), function(node) {
  106. var data = null;
  107. try {
  108. data = node.toDataURL("image/png", "");
  109. } catch (e) {
  110. }
  111. canvasData.push(data);
  112. });
  113. return canvasData;
  114. }
  115. function initProcess(doc, docElement, winId, topWindow, canvasData) {
  116. var requestManager = new RequestManager(pageId, winId);
  117. docs[winId] = {
  118. doc : doc,
  119. docElement : docElement,
  120. frames : docElement.querySelectorAll("iframe, frame"),
  121. requestManager : requestManager,
  122. processDoc : singlefile.initProcess(doc, docElement, topWindow, doc.baseURI, doc.characterSet, config, canvasData, requestManager, function(
  123. maxIndex) {
  124. bgPort.postMessage({
  125. docInit : true,
  126. pageId : pageId,
  127. winId : winId,
  128. maxIndex : maxIndex
  129. });
  130. }, function(index) {
  131. bgPort.postMessage({
  132. docProgress : true,
  133. pageId : pageId,
  134. winId : winId,
  135. index : index
  136. });
  137. }, function() {
  138. bgPort.postMessage({
  139. docEnd : true,
  140. pageId : pageId,
  141. winId : winId,
  142. content : topWindow ? null : singlefile.util.getDocContent(doc, docElement)
  143. });
  144. })
  145. };
  146. }
  147. function sendFgProcessInit(title, url, baseURI, winId, winIndex) {
  148. bgPort.postMessage({
  149. processInit : true,
  150. pageId : pageId,
  151. topWindow : winId ? false : window == top,
  152. url : url || location.href,
  153. title : title || doc.title,
  154. baseURI : baseURI || doc.baseURI,
  155. winId : winId || wininfo.winId,
  156. index : winIndex || wininfo.index
  157. });
  158. }
  159. function sendBgProcessInit(content, title, url, baseURI, characterSet, winId, winIndex) {
  160. bgPort.postMessage({
  161. processInit : true,
  162. pageId : pageId,
  163. topWindow : winId ? false : window == top,
  164. url : url || location.href,
  165. title : title || doc.title,
  166. content : content,
  167. baseURI : baseURI || doc.baseURI,
  168. characterSet : characterSet || doc.characterSet,
  169. canvasData : canvasData,
  170. winId : winId || wininfo.winId,
  171. index : winIndex || wininfo.index
  172. });
  173. }
  174. // ----------------------------------------------------------------------------------------------
  175. function init() {
  176. var selectedContent = getSelectedContent(), topWindow = window == top;
  177. function doFgProcessInit() {
  178. sendFgProcessInit();
  179. if (docElement && (!singlefile.processSelection || selectedContent)) {
  180. initProcess(doc, docElement, wininfo.winId, topWindow, canvasData);
  181. if (topWindow && !config.removeFrames && !config.getRawDoc)
  182. wininfo.frames.forEach(function(frame) {
  183. if (frame.sameDomain)
  184. wininfo.getContent(frame, function(message) {
  185. var frameDoc = document.implementation.createHTMLDocument();
  186. frameDoc.open();
  187. frameDoc.write(message.content);
  188. frameDoc.close();
  189. sendFgProcessInit(message.title, message.url, message.baseURI, frame.winId, frame.index);
  190. initProcess(frameDoc, frameDoc.documentElement, frame.winId, false, getCanvasData(frameDoc));
  191. });
  192. });
  193. }
  194. }
  195. function bgProcessInit() {
  196. var xhr;
  197. if (singlefile.processSelection) {
  198. if (selectedContent || !topWindow)
  199. sendBgProcessInit(topWindow ? singlefile.util.getDocContent(doc, selectedContent) : null);
  200. } else {
  201. if (config.getRawDoc && topWindow) {
  202. xhr = new XMLHttpRequest();
  203. xhr.onreadystatechange = function() {
  204. if (xhr.readyState == 4)
  205. sendBgProcessInit(xhr.responseText);
  206. };
  207. xhr.open("GET", doc.location.href, true);
  208. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  209. xhr.send(null);
  210. } else {
  211. sendBgProcessInit(singlefile.util.getDocContent(doc));
  212. if (topWindow && !config.removeFrames)
  213. wininfo.frames.forEach(function(frame) {
  214. if (frame.sameDomain)
  215. wininfo.getContent(frame, function(message) {
  216. sendBgProcessInit(message.content, message.title, message.url, message.baseURI, message.characterSet, frame.winId,
  217. frame.index);
  218. });
  219. });
  220. }
  221. }
  222. }
  223. function fgProcessInit() {
  224. var xhr, tmpDoc;
  225. if (singlefile.processSelection) {
  226. if (selectedContent || topWindow) {
  227. docElement = selectedContent;
  228. doFgProcessInit();
  229. }
  230. } else if (config.getRawDoc && topWindow) {
  231. xhr = new XMLHttpRequest();
  232. xhr.onreadystatechange = function() {
  233. if (xhr.readyState == 4) {
  234. tmpDoc = document.implementation.createHTMLDocument();
  235. tmpDoc.open();
  236. tmpDoc.write(xhr.responseText);
  237. tmpDoc.close();
  238. docElement = doc.importNode(tmpDoc.documentElement, true);
  239. doFgProcessInit();
  240. }
  241. };
  242. xhr.open("GET", doc.location.href, true);
  243. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  244. xhr.send(null);
  245. } else {
  246. docElement = doc.documentElement.cloneNode(true);
  247. doFgProcessInit();
  248. }
  249. }
  250. if (!selectedContent) {
  251. Array.prototype.forEach.call(doc.querySelectorAll("noscript"), function(node) {
  252. node.textContent = "";
  253. });
  254. canvasData = getCanvasData(doc);
  255. if (config.removeHidden)
  256. removeHiddenElements();
  257. if (topWindow)
  258. document.documentElement.insertBefore(document.createComment("\n Archive processed by SingleFile \n url: " + location.href + " \n saved date: "
  259. + new Date() + " \n"), document.documentElement.firstChild);
  260. }
  261. if ((!config.removeFrames && !config.getRawDoc) || topWindow)
  262. if (config.processInBackground)
  263. bgProcessInit();
  264. else
  265. fgProcessInit();
  266. }
  267. function setContentRequest(message) {
  268. var mutationEventId = 0, winId = wininfo.winId, timeoutSetContent;
  269. function resetWindowProperties(winPropertiesStr) {
  270. var property, winProp, customEvent, parse = JSON.parse || JSON.decode;
  271. try {
  272. winProp = parse(winPropertiesStr);
  273. for (property in window)
  274. if (!winProp[property])
  275. window[property] = null;
  276. } catch (e) {
  277. console.log(e);
  278. }
  279. customEvent = document.createEvent("CustomEvent");
  280. customEvent.initCustomEvent("WindowPropertiesCleaned", true, true);
  281. document.dispatchEvent(customEvent);
  282. }
  283. function onDOMSubtreeModified(event) {
  284. var id = mutationEventId, element = event.target, processDocFn;
  285. function onSetDocFragment(message) {
  286. if (message.setDocFragment && message.mutationEventId == id) {
  287. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  288. element.innerHTML = message.content;
  289. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  290. bgPort.onMessage.removeListener(onSetDocFragment);
  291. }
  292. }
  293. if (element.innerHTML) {
  294. if (config.processInBackground) {
  295. bgPort.postMessage({
  296. processDocFragment : true,
  297. pageId : pageId,
  298. winId : winId,
  299. content : element.innerHTML,
  300. mutationEventId : id
  301. });
  302. bgPort.onMessage.addListener(onSetDocFragment);
  303. mutationEventId++;
  304. } else
  305. processDocFn = singlefile.initProcess(doc, element, false, doc.baseURI, doc.characterSet, config, canvasData, docs[winId].requestManager,
  306. function(maxIndex) {
  307. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  308. processDocFn();
  309. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  310. });
  311. }
  312. event.preventDefault();
  313. }
  314. function onWindowPropertiesCleaned() {
  315. var tmpDoc;
  316. function replaceDoc() {
  317. doc.replaceChild(docElement, doc.documentElement);
  318. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  319. }
  320. if (timeoutSetContent) {
  321. clearTimeout(timeoutSetContent);
  322. timeoutSetContent = null;
  323. }
  324. doc.removeEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  325. if (config.processInBackground || singlefile.processSelection || (!config.processInBackground && !config.removeScripts))
  326. if (location.pathname.indexOf(".txt") + 4 == location.pathname.length) {
  327. tmpDoc = document.implementation.createHTMLDocument();
  328. tmpDoc.open();
  329. tmpDoc.write(message.content);
  330. tmpDoc.close();
  331. docElement = doc.importNode(tmpDoc.documentElement, true);
  332. replaceDoc();
  333. } else {
  334. doc.open();
  335. doc.write(message.content || singlefile.util.getDocContent(doc, docElement));
  336. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  337. doc.close();
  338. }
  339. else
  340. replaceDoc();
  341. if (config.removeUnusedCSSRules)
  342. removeUnusedCSSRules();
  343. setContentResponse();
  344. }
  345. function sendSetContentResponse(content) {
  346. bgPort.postMessage({
  347. setContentResponse : true,
  348. winId : "0",
  349. pageId : pageId,
  350. content : config.savePage || config.getContent ? content : null
  351. });
  352. }
  353. function setContentResponse() {
  354. if (singlefile.processSelection)
  355. sendSetContentResponse(message.content);
  356. else {
  357. if (config.processInBackground)
  358. sendSetContentResponse(singlefile.util.getDocContent(doc, doc.documentElement));
  359. else
  360. sendSetContentResponse(config.removeUnusedCSSRules ? singlefile.util.getDocContent(doc, doc.documentElement) : singlefile.util
  361. .getDocContent(doc, docElement));
  362. }
  363. }
  364. if (config.displayProcessedPage) {
  365. window.location.href = "javascript:(" + resetWindowProperties.toString() + ")('" + JSON.stringify(message.winProperties) + "'); void 0;";
  366. timeoutSetContent = setTimeout(onWindowPropertiesCleaned, 3000);
  367. doc.addEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  368. } else
  369. setContentResponse();
  370. }
  371. function getResourceContentResponse(message) {
  372. docs[message.winId].requestManager.onResponse(message.requestId, message.content);
  373. }
  374. function setFrameContentRequest(message) {
  375. docs[message.winId].frames[message.index].setAttribute("src", "data:text/html;charset=utf-8," + encodeURI(message.content));
  376. bgPort.postMessage({
  377. setFrameContentResponse : true,
  378. pageId : pageId,
  379. winId : message.winId,
  380. index : message.index
  381. });
  382. }
  383. function getContentRequest(message) {
  384. if (docs[message.winId].doc)
  385. bgPort.postMessage({
  386. getContentResponse : true,
  387. winId : message.winId,
  388. pageId : pageId,
  389. content : singlefile.util.getDocContent(docs[message.winId].doc, docs[message.winId].docElement)
  390. });
  391. else
  392. bgPort.postMessage({
  393. getContentResponse : true,
  394. pageId : pageId,
  395. winId : message.winId,
  396. content : singlefile.util.getDocContent(doc, docElement)
  397. });
  398. }
  399. function processDoc(message) {
  400. if (docs[message.winId])
  401. docs[message.winId].processDoc();
  402. }
  403. bgPort = chrome.extension.connect({
  404. name : "singlefile"
  405. });
  406. bgPort.onMessage.addListener(function(message) {
  407. // if (!message.getResourceContentResponse)
  408. // console.log(message);
  409. if (message.getResourceContentResponse)
  410. getResourceContentResponse(message);
  411. if (message.setFrameContentRequest)
  412. setFrameContentRequest(message);
  413. if (message.getContentRequest)
  414. getContentRequest(message);
  415. if (message.setContentRequest)
  416. setContentRequest(message);
  417. if (message.processDoc)
  418. processDoc(message);
  419. });
  420. if (doc.documentElement instanceof HTMLHtmlElement)
  421. init();
  422. })();