content.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. /*
  2. * Copyright 2011 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile Core.
  6. *
  7. * SingleFile Core is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile Core is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile Core. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function() {
  21. var bgPort, docs = {}, pageId = singlefile.pageId, doc = document, docElement, canvasData = [], config = singlefile.config;
  22. function RequestManager(pageId, winId) {
  23. var requestId = 0, callbacks = [];
  24. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  25. callbacks[requestId] = responseHandler;
  26. bgPort.postMessage({
  27. getResourceContentRequest : true,
  28. pageId : pageId,
  29. winId : winId,
  30. requestId : requestId,
  31. url : url,
  32. characterSet : characterSet,
  33. mediaTypeParam : mediaTypeParam
  34. });
  35. requestId++;
  36. };
  37. this.onResponse = function(id, content) {
  38. callbacks[id](content);
  39. callbacks[id] = null;
  40. };
  41. }
  42. function removeUnusedCSSRules() {
  43. Array.prototype.forEach.call(document.querySelectorAll("style"), function(style) {
  44. var cssRules = [];
  45. function process(rules) {
  46. Array.prototype.forEach.call(rules, function(rule) {
  47. var selector;
  48. if (rule.media) {
  49. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  50. process(rule.cssRules);
  51. cssRules.push("}");
  52. } else if (rule.selectorText) {
  53. selector = rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, '').trim();
  54. if (selector)
  55. try {
  56. if (document.querySelector(selector))
  57. cssRules.push(rule.cssText);
  58. } catch (e) {
  59. cssRules.push(rule.cssText);
  60. }
  61. }
  62. });
  63. }
  64. if (style.sheet) {
  65. process(style.sheet.rules);
  66. style.innerText = cssRules.join("");
  67. }
  68. });
  69. }
  70. function removeHiddenElements() {
  71. Array.prototype.forEach.call(doc.querySelectorAll("html > body *:not(style):not(script):not(link):not(area)"), function(element) {
  72. var style = getComputedStyle(element), tagName = element.tagName.toLowerCase();
  73. if (tagName != "iframe" && tagName != "frame" && ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0)))
  74. element.parentElement.removeChild(element);
  75. });
  76. }
  77. function getSelectedContent() {
  78. var node, wrapper, clonedNode, selection = getSelection(), range = selection.rangeCount ? selection.getRangeAt(0) : null;
  79. function addStyle(node) {
  80. var rules, cssText;
  81. Array.prototype.forEach.call(node.children, function(child) {
  82. addStyle(child);
  83. });
  84. rules = getMatchedCSSRules(node, '', false);
  85. if (rules) {
  86. cssText = "";
  87. Array.prototype.forEach.call(rules, function(rule) {
  88. cssText += rule.style.cssText;
  89. });
  90. node.setAttribute("style", cssText);
  91. }
  92. }
  93. if (range && range.startOffset != range.endOffset) {
  94. node = range.commonAncestorContainer;
  95. if (node.nodeType != node.ELEMENT_NODE)
  96. node = node.parentElement;
  97. clonedNode = node.cloneNode(true);
  98. addStyle(node);
  99. node.parentElement.replaceChild(clonedNode, node);
  100. }
  101. return node;
  102. }
  103. function getCanvasData(doc) {
  104. var canvasData = [];
  105. Array.prototype.forEach.call(doc.querySelectorAll("canvas"), function(node) {
  106. var data = null;
  107. try {
  108. data = node.toDataURL("image/png", "");
  109. } catch (e) {
  110. }
  111. canvasData.push(data);
  112. });
  113. return canvasData;
  114. }
  115. function initProcess(doc, docElement, winId, topWindow, canvasData) {
  116. var requestManager = new RequestManager(pageId, winId);
  117. docs[winId] = {
  118. doc : doc,
  119. docElement : docElement,
  120. frames : docElement.querySelectorAll("iframe, frame"),
  121. requestManager : requestManager,
  122. processDoc : singlefile.initProcess(doc, docElement, topWindow, doc.baseURI, doc.characterSet, config, canvasData, requestManager, function(
  123. maxIndex) {
  124. bgPort.postMessage({
  125. docInit : true,
  126. pageId : pageId,
  127. winId : winId,
  128. maxIndex : maxIndex
  129. });
  130. }, function(index) {
  131. bgPort.postMessage({
  132. docProgress : true,
  133. pageId : pageId,
  134. winId : winId,
  135. index : index
  136. });
  137. }, function() {
  138. bgPort.postMessage({
  139. docEnd : true,
  140. pageId : pageId,
  141. winId : winId,
  142. content : topWindow ? null : singlefile.util.getDocContent(doc, docElement)
  143. });
  144. })
  145. };
  146. }
  147. function sendFgProcessInit(title, url, baseURI, winId, winIndex) {
  148. var contextmenuTime = window.contextmenuTime;
  149. window.contextmenuTime = null;
  150. bgPort.postMessage({
  151. processInit : true,
  152. pageId : pageId,
  153. topWindow : winId ? false : window == top,
  154. url : url || location.href,
  155. title : title || doc.title,
  156. baseURI : baseURI || doc.baseURI,
  157. winId : winId || wininfo.winId,
  158. contextmenuTime : contextmenuTime,
  159. index : winIndex || wininfo.index
  160. });
  161. }
  162. function sendBgProcessInit(content, title, url, baseURI, characterSet, winId, winIndex) {
  163. var contextmenuTime = window.contextmenuTime;
  164. window.contextmenuTime = null;
  165. bgPort.postMessage({
  166. processInit : true,
  167. pageId : pageId,
  168. topWindow : winId ? false : window == top,
  169. url : url || location.href,
  170. title : title || doc.title,
  171. content : content,
  172. baseURI : baseURI || doc.baseURI,
  173. characterSet : characterSet || doc.characterSet,
  174. canvasData : canvasData,
  175. winId : winId || wininfo.winId,
  176. contextmenuTime : contextmenuTime,
  177. index : winIndex || wininfo.index
  178. });
  179. }
  180. // ----------------------------------------------------------------------------------------------
  181. function init() {
  182. var selectedContent = getSelectedContent(), topWindow = window == top;
  183. function doFgProcessInit() {
  184. sendFgProcessInit();
  185. if (docElement && (!singlefile.processSelection || selectedContent)) {
  186. initProcess(doc, docElement, wininfo.winId, topWindow, canvasData);
  187. if (topWindow && !config.removeFrames && !config.getRawDoc)
  188. wininfo.frames.forEach(function(frame) {
  189. if (frame.sameDomain)
  190. wininfo.getContent(frame, function(message) {
  191. var frameDoc = document.implementation.createHTMLDocument();
  192. frameDoc.open();
  193. frameDoc.write(message.content);
  194. frameDoc.close();
  195. sendFgProcessInit(message.title, message.url, message.baseURI, frame.winId, frame.index);
  196. initProcess(frameDoc, frameDoc.documentElement, frame.winId, false, getCanvasData(frameDoc));
  197. });
  198. });
  199. }
  200. }
  201. function bgProcessInit() {
  202. var xhr;
  203. if (singlefile.processSelection) {
  204. if (selectedContent || !topWindow)
  205. sendBgProcessInit(topWindow ? singlefile.util.getDocContent(doc, selectedContent) : null);
  206. } else {
  207. if (config.getRawDoc && topWindow) {
  208. xhr = new XMLHttpRequest();
  209. xhr.onreadystatechange = function() {
  210. if (xhr.readyState == 4)
  211. sendBgProcessInit(xhr.responseText);
  212. };
  213. xhr.open("GET", doc.location.href, true);
  214. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  215. xhr.send(null);
  216. } else {
  217. sendBgProcessInit(singlefile.util.getDocContent(doc));
  218. if (topWindow && !config.removeFrames)
  219. wininfo.frames.forEach(function(frame) {
  220. if (frame.sameDomain)
  221. wininfo.getContent(frame, function(message) {
  222. sendBgProcessInit(message.content, message.title, message.url, message.baseURI, message.characterSet, frame.winId,
  223. frame.index);
  224. });
  225. });
  226. }
  227. }
  228. }
  229. function fgProcessInit() {
  230. var xhr, tmpDoc;
  231. if (singlefile.processSelection) {
  232. if (selectedContent || topWindow) {
  233. docElement = selectedContent;
  234. doFgProcessInit();
  235. }
  236. } else if (config.getRawDoc && topWindow) {
  237. xhr = new XMLHttpRequest();
  238. xhr.onreadystatechange = function() {
  239. if (xhr.readyState == 4) {
  240. tmpDoc = document.implementation.createHTMLDocument();
  241. tmpDoc.open();
  242. tmpDoc.write(xhr.responseText);
  243. tmpDoc.close();
  244. docElement = doc.importNode(tmpDoc.documentElement, true);
  245. doFgProcessInit();
  246. }
  247. };
  248. xhr.open("GET", doc.location.href, true);
  249. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  250. xhr.send(null);
  251. } else {
  252. docElement = doc.documentElement.cloneNode(true);
  253. doFgProcessInit();
  254. }
  255. }
  256. if (!selectedContent) {
  257. Array.prototype.forEach.call(doc.querySelectorAll("noscript"), function(node) {
  258. node.textContent = "";
  259. });
  260. canvasData = getCanvasData(doc);
  261. if (config.removeHidden)
  262. removeHiddenElements();
  263. if (topWindow)
  264. document.documentElement.insertBefore(document.createComment("\n Archive processed by SingleFile \n url: " + location.href + " \n saved date: "
  265. + new Date() + " \n"), document.documentElement.firstChild);
  266. }
  267. if ((!config.removeFrames && !config.getRawDoc) || topWindow)
  268. if (config.processInBackground)
  269. bgProcessInit();
  270. else
  271. fgProcessInit();
  272. }
  273. function setContentRequest(message) {
  274. var mutationEventId = 0, winId = wininfo.winId, timeoutSetContent;
  275. function resetWindowProperties(winPropertiesStr) {
  276. var property, winProp, customEvent, parse = JSON.parse || JSON.decode;
  277. try {
  278. winProp = parse(winPropertiesStr);
  279. for (property in window)
  280. if (!winProp[property])
  281. window[property] = null;
  282. } catch (e) {
  283. console.log(e);
  284. }
  285. customEvent = document.createEvent("CustomEvent");
  286. customEvent.initCustomEvent("WindowPropertiesCleaned", true, true);
  287. document.dispatchEvent(customEvent);
  288. }
  289. function onDOMSubtreeModified(event) {
  290. var id = mutationEventId, element = event.target, processDocFn;
  291. function onSetDocFragment(message) {
  292. if (message.setDocFragment && message.mutationEventId == id) {
  293. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  294. element.innerHTML = message.content;
  295. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  296. bgPort.onMessage.removeListener(onSetDocFragment);
  297. }
  298. }
  299. if (element.innerHTML) {
  300. if (config.processInBackground) {
  301. bgPort.postMessage({
  302. processDocFragment : true,
  303. pageId : pageId,
  304. winId : winId,
  305. content : element.innerHTML,
  306. mutationEventId : id
  307. });
  308. bgPort.onMessage.addListener(onSetDocFragment);
  309. mutationEventId++;
  310. } else
  311. processDocFn = singlefile.initProcess(doc, element, false, doc.baseURI, doc.characterSet, config, canvasData, docs[winId].requestManager,
  312. function(maxIndex) {
  313. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  314. processDocFn();
  315. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  316. });
  317. }
  318. event.preventDefault();
  319. }
  320. function onWindowPropertiesCleaned() {
  321. var tmpDoc;
  322. function replaceDoc() {
  323. doc.replaceChild(docElement, doc.documentElement);
  324. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  325. }
  326. if (timeoutSetContent) {
  327. clearTimeout(timeoutSetContent);
  328. timeoutSetContent = null;
  329. }
  330. doc.removeEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  331. if (config.processInBackground || singlefile.processSelection || (!config.processInBackground && !config.removeScripts))
  332. if (location.pathname.indexOf(".txt") + 4 == location.pathname.length) {
  333. tmpDoc = document.implementation.createHTMLDocument();
  334. tmpDoc.open();
  335. tmpDoc.write(message.content);
  336. tmpDoc.close();
  337. docElement = doc.importNode(tmpDoc.documentElement, true);
  338. replaceDoc();
  339. } else {
  340. doc.open();
  341. doc.write(message.content || singlefile.util.getDocContent(doc, docElement));
  342. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  343. doc.close();
  344. }
  345. else
  346. replaceDoc();
  347. if (config.removeUnusedCSSRules)
  348. removeUnusedCSSRules();
  349. setContentResponse();
  350. }
  351. function sendSetContentResponse(content) {
  352. bgPort.postMessage({
  353. setContentResponse : true,
  354. winId : "0",
  355. pageId : pageId,
  356. content : config.getContent ? content : null
  357. });
  358. }
  359. function setContentResponse() {
  360. if (singlefile.processSelection)
  361. sendSetContentResponse(message.content);
  362. else {
  363. if (config.processInBackground)
  364. sendSetContentResponse(singlefile.util.getDocContent(doc, doc.documentElement));
  365. else
  366. sendSetContentResponse(config.removeUnusedCSSRules ? singlefile.util.getDocContent(doc, doc.documentElement) : singlefile.util
  367. .getDocContent(doc, docElement));
  368. }
  369. }
  370. if (config.displayProcessedPage) {
  371. window.location.href = "javascript:(" + resetWindowProperties.toString() + ")('" + JSON.stringify(message.winProperties) + "'); void 0;";
  372. timeoutSetContent = setTimeout(onWindowPropertiesCleaned, 3000);
  373. doc.addEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  374. } else
  375. setContentResponse();
  376. }
  377. function getResourceContentResponse(message) {
  378. docs[message.winId].requestManager.onResponse(message.requestId, message.content);
  379. }
  380. function setFrameContentRequest(message) {
  381. docs[message.winId].frames[message.index].setAttribute("src", "data:text/html;charset=utf-8," + encodeURI(message.content));
  382. bgPort.postMessage({
  383. setFrameContentResponse : true,
  384. pageId : pageId,
  385. winId : message.winId,
  386. index : message.index
  387. });
  388. }
  389. function getContentRequest(message) {
  390. if (docs[message.winId].doc)
  391. bgPort.postMessage({
  392. getContentResponse : true,
  393. winId : message.winId,
  394. pageId : pageId,
  395. content : singlefile.util.getDocContent(docs[message.winId].doc, docs[message.winId].docElement)
  396. });
  397. else
  398. bgPort.postMessage({
  399. getContentResponse : true,
  400. pageId : pageId,
  401. winId : message.winId,
  402. content : singlefile.util.getDocContent(doc, docElement)
  403. });
  404. }
  405. function processDoc(message) {
  406. if (docs[message.winId])
  407. docs[message.winId].processDoc();
  408. }
  409. bgPort = chrome.extension.connect({
  410. name : "singlefile"
  411. });
  412. bgPort.onMessage.addListener(function(message) {
  413. // if (!message.getResourceContentResponse)
  414. // console.log(message);
  415. if (message.getResourceContentResponse)
  416. getResourceContentResponse(message);
  417. if (message.setFrameContentRequest)
  418. setFrameContentRequest(message);
  419. if (message.getContentRequest)
  420. getContentRequest(message);
  421. if (message.setContentRequest)
  422. setContentRequest(message);
  423. if (message.processDoc)
  424. processDoc(message);
  425. });
  426. if (doc.documentElement instanceof HTMLHtmlElement)
  427. init();
  428. })();