content.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. /*
  2. * Copyright 2011 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile Core.
  6. *
  7. * SingleFile Core is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile Core is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile Core. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. (function() {
  21. var bgPort, docs = {}, pageId = singlefile.pageId, doc = document, docElement, canvasData = [], config = singlefile.config;
  22. function RequestManager(pageId, winId) {
  23. var requestId = 0, callbacks = [];
  24. this.send = function(url, responseHandler, characterSet, mediaTypeParam) {
  25. callbacks[requestId] = responseHandler;
  26. bgPort.postMessage({
  27. getResourceContentRequest : true,
  28. pageId : pageId,
  29. winId : winId,
  30. requestId : requestId,
  31. url : url,
  32. characterSet : characterSet,
  33. mediaTypeParam : mediaTypeParam
  34. });
  35. requestId++;
  36. };
  37. this.onResponse = function(id, content) {
  38. callbacks[id](content);
  39. callbacks[id] = null;
  40. };
  41. }
  42. function removeUnusedCSSRules() {
  43. Array.prototype.forEach.call(document.querySelectorAll("style"), function(style) {
  44. var cssRules = [];
  45. function process(rules) {
  46. Array.prototype.forEach.call(rules, function(rule) {
  47. var selector;
  48. if (rule.media) {
  49. cssRules.push("@media " + Array.prototype.join.call(rule.media, ",") + " {");
  50. process(rule.cssRules);
  51. cssRules.push("}");
  52. } else if (rule.selectorText) {
  53. selector = rule.selectorText.replace(/::after|::before|::first-line|::first-letter|:focus|:hover/gi, '').trim();
  54. if (selector)
  55. try {
  56. if (document.querySelector(selector))
  57. cssRules.push(rule.cssText);
  58. } catch (e) {
  59. cssRules.push(rule.cssText);
  60. }
  61. }
  62. });
  63. }
  64. if (style.sheet) {
  65. process(style.sheet.rules);
  66. style.innerText = cssRules.join("");
  67. }
  68. });
  69. }
  70. function removeHiddenElements() {
  71. Array.prototype.forEach.call(doc.querySelectorAll("html > body *:not(style):not(script):not(link):not(area)"), function(element) {
  72. var style = getComputedStyle(element), tagName = element.tagName.toLowerCase();
  73. if (tagName != "iframe" && tagName != "frame" && ((style.visibility == "hidden" || style.display == "none" || style.opacity == 0)))
  74. element.parentElement.removeChild(element);
  75. });
  76. }
  77. function getSelectedContent() {
  78. var node, wrapper, clonedNode, selection = getSelection(), range = selection.rangeCount ? selection.getRangeAt(0) : null;
  79. function addStyle(node) {
  80. var rules, cssText;
  81. Array.prototype.forEach.call(node.children, function(child) {
  82. addStyle(child);
  83. });
  84. rules = getMatchedCSSRules(node, '', false);
  85. if (rules) {
  86. cssText = "";
  87. Array.prototype.forEach.call(rules, function(rule) {
  88. cssText += rule.style.cssText;
  89. });
  90. node.setAttribute("style", cssText);
  91. }
  92. }
  93. if (range && range.startOffset != range.endOffset) {
  94. node = range.commonAncestorContainer;
  95. if (node.nodeType != node.ELEMENT_NODE)
  96. node = node.parentElement;
  97. clonedNode = node.cloneNode(true);
  98. addStyle(node);
  99. node.parentElement.replaceChild(clonedNode, node);
  100. }
  101. return node;
  102. }
  103. function getCanvasData(doc) {
  104. var canvasData = [];
  105. Array.prototype.forEach.call(doc.querySelectorAll("canvas"), function(node) {
  106. var data = null;
  107. try {
  108. data = node.toDataURL("image/png", "");
  109. } catch (e) {
  110. }
  111. canvasData.push(data);
  112. });
  113. return canvasData;
  114. }
  115. function initProcess(doc, docElement, winId, topWindow, canvasData) {
  116. var requestManager = new RequestManager(pageId, winId);
  117. docs[winId] = {
  118. doc : doc,
  119. docElement : docElement,
  120. frames : docElement.querySelectorAll("iframe, frame"),
  121. requestManager : requestManager,
  122. processDoc : singlefile.initProcess(doc, docElement, topWindow, doc.baseURI, doc.characterSet, config, canvasData, requestManager, function(
  123. maxIndex) {
  124. bgPort.postMessage({
  125. docInit : true,
  126. pageId : pageId,
  127. winId : winId,
  128. maxIndex : maxIndex
  129. });
  130. }, function(index) {
  131. bgPort.postMessage({
  132. docProgress : true,
  133. pageId : pageId,
  134. winId : winId,
  135. index : index
  136. });
  137. }, function() {
  138. bgPort.postMessage({
  139. docEnd : true,
  140. pageId : pageId,
  141. winId : winId,
  142. content : topWindow ? null : singlefile.util.getDocContent(doc, docElement)
  143. });
  144. })
  145. };
  146. }
  147. function sendFgProcessInit(title, url, baseURI, winId, winIndex) {
  148. var contextmenuTime = window.contextmenuTime;
  149. window.contextmenuTime = null;
  150. bgPort.postMessage({
  151. processInit : true,
  152. pageId : pageId,
  153. topWindow : winId ? false : window == top,
  154. url : url || location.href,
  155. title : title || doc.title,
  156. baseURI : baseURI || doc.baseURI,
  157. winId : winId || wininfo.winId,
  158. contextmenuTime : contextmenuTime,
  159. index : winIndex || wininfo.index
  160. });
  161. }
  162. function sendBgProcessInit(content, title, url, baseURI, characterSet, winId, winIndex) {
  163. var contextmenuTime = window.contextmenuTime;
  164. if (!this.wininfo)
  165. return;
  166. window.contextmenuTime = null;
  167. bgPort.postMessage({
  168. processInit : true,
  169. pageId : pageId,
  170. topWindow : winId ? false : window == top,
  171. url : url || location.href,
  172. title : title || doc.title,
  173. content : content,
  174. baseURI : baseURI || doc.baseURI,
  175. characterSet : characterSet || doc.characterSet,
  176. canvasData : canvasData,
  177. winId : winId || wininfo.winId,
  178. contextmenuTime : contextmenuTime,
  179. index : winIndex || wininfo.index
  180. });
  181. }
  182. // ----------------------------------------------------------------------------------------------
  183. function init() {
  184. var selectedContent = getSelectedContent(), topWindow = window == top;
  185. function doFgProcessInit() {
  186. sendFgProcessInit();
  187. if (docElement && (!singlefile.processSelection || selectedContent)) {
  188. initProcess(doc, docElement, wininfo.winId, topWindow, canvasData);
  189. if (topWindow && !config.removeFrames && !config.getRawDoc)
  190. wininfo.frames.forEach(function(frame) {
  191. if (frame.sameDomain)
  192. wininfo.getContent(frame, function(message) {
  193. var frameDoc = document.implementation.createHTMLDocument();
  194. frameDoc.open();
  195. frameDoc.write(message.content);
  196. frameDoc.close();
  197. sendFgProcessInit(message.title, message.url, message.baseURI, frame.winId, frame.index);
  198. initProcess(frameDoc, frameDoc.documentElement, frame.winId, false, getCanvasData(frameDoc));
  199. });
  200. });
  201. }
  202. }
  203. function bgProcessInit() {
  204. var xhr;
  205. if (singlefile.processSelection) {
  206. if (selectedContent || !topWindow)
  207. sendBgProcessInit(topWindow ? singlefile.util.getDocContent(doc, selectedContent) : null);
  208. } else {
  209. if (config.getRawDoc && topWindow) {
  210. xhr = new XMLHttpRequest();
  211. xhr.onreadystatechange = function() {
  212. if (xhr.readyState == 4)
  213. sendBgProcessInit(xhr.responseText);
  214. };
  215. xhr.open("GET", doc.location.href, true);
  216. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  217. xhr.send(null);
  218. } else {
  219. sendBgProcessInit(singlefile.util.getDocContent(doc));
  220. if (topWindow && !config.removeFrames)
  221. wininfo.frames.forEach(function(frame) {
  222. if (frame.sameDomain)
  223. wininfo.getContent(frame, function(message) {
  224. sendBgProcessInit(message.content, message.title, message.url, message.baseURI, message.characterSet, frame.winId,
  225. frame.index);
  226. });
  227. });
  228. }
  229. }
  230. }
  231. function fgProcessInit() {
  232. var xhr, tmpDoc;
  233. if (singlefile.processSelection) {
  234. if (selectedContent || topWindow) {
  235. docElement = selectedContent;
  236. doFgProcessInit();
  237. }
  238. } else if (config.getRawDoc && topWindow) {
  239. xhr = new XMLHttpRequest();
  240. xhr.onreadystatechange = function() {
  241. if (xhr.readyState == 4) {
  242. tmpDoc = document.implementation.createHTMLDocument();
  243. tmpDoc.open();
  244. tmpDoc.write(xhr.responseText);
  245. tmpDoc.close();
  246. docElement = doc.importNode(tmpDoc.documentElement, true);
  247. doFgProcessInit();
  248. }
  249. };
  250. xhr.open("GET", doc.location.href, true);
  251. xhr.overrideMimeType('text/plain; charset=' + doc.characterSet);
  252. xhr.send(null);
  253. } else {
  254. docElement = doc.documentElement.cloneNode(true);
  255. doFgProcessInit();
  256. }
  257. }
  258. if (!selectedContent) {
  259. Array.prototype.forEach.call(doc.querySelectorAll("noscript"), function(node) {
  260. node.textContent = "";
  261. });
  262. canvasData = getCanvasData(doc);
  263. if (config.removeHidden)
  264. removeHiddenElements();
  265. if (topWindow)
  266. document.documentElement.insertBefore(document.createComment("\n Archive processed by SingleFile \n url: " + location.href + " \n saved date: "
  267. + new Date() + " \n"), document.documentElement.firstChild);
  268. }
  269. if ((!config.removeFrames && !config.getRawDoc) || topWindow)
  270. if (config.processInBackground)
  271. bgProcessInit();
  272. else
  273. fgProcessInit();
  274. }
  275. function setContentRequest(message) {
  276. var mutationEventId = 0, winId = wininfo.winId, timeoutSetContent;
  277. function resetWindowProperties(winPropertiesStr) {
  278. var property, winProp, customEvent, parse = JSON.parse || JSON.decode;
  279. try {
  280. winProp = parse(winPropertiesStr);
  281. for (property in window)
  282. if (!winProp[property])
  283. window[property] = null;
  284. } catch (e) {
  285. console.log(e);
  286. }
  287. customEvent = document.createEvent("CustomEvent");
  288. customEvent.initCustomEvent("WindowPropertiesCleaned", true, true);
  289. document.dispatchEvent(customEvent);
  290. }
  291. function onDOMSubtreeModified(event) {
  292. var id = mutationEventId, element = event.target, processDocFn;
  293. function onSetDocFragment(message) {
  294. if (message.setDocFragment && message.mutationEventId == id) {
  295. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  296. element.innerHTML = message.content;
  297. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  298. bgPort.onMessage.removeListener(onSetDocFragment);
  299. }
  300. }
  301. if (element.innerHTML) {
  302. if (config.processInBackground) {
  303. bgPort.postMessage({
  304. processDocFragment : true,
  305. pageId : pageId,
  306. winId : winId,
  307. content : element.innerHTML,
  308. mutationEventId : id
  309. });
  310. bgPort.onMessage.addListener(onSetDocFragment);
  311. mutationEventId++;
  312. } else
  313. processDocFn = singlefile.initProcess(doc, element, false, doc.baseURI, doc.characterSet, config, canvasData, docs[winId].requestManager,
  314. function(maxIndex) {
  315. doc.removeEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  316. processDocFn();
  317. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  318. });
  319. }
  320. event.preventDefault();
  321. }
  322. function onWindowPropertiesCleaned() {
  323. var tmpDoc;
  324. function replaceDoc() {
  325. doc.replaceChild(docElement, doc.documentElement);
  326. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  327. }
  328. if (timeoutSetContent) {
  329. clearTimeout(timeoutSetContent);
  330. timeoutSetContent = null;
  331. }
  332. doc.removeEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  333. if (config.processInBackground || singlefile.processSelection || (!config.processInBackground && !config.removeScripts))
  334. if (location.pathname.indexOf(".txt") + 4 == location.pathname.length) {
  335. tmpDoc = document.implementation.createHTMLDocument();
  336. tmpDoc.open();
  337. tmpDoc.write(message.content);
  338. tmpDoc.close();
  339. docElement = doc.importNode(tmpDoc.documentElement, true);
  340. replaceDoc();
  341. } else {
  342. doc.open();
  343. doc.write(message.content || singlefile.util.getDocContent(doc, docElement));
  344. doc.addEventListener("DOMSubtreeModified", onDOMSubtreeModified, true);
  345. doc.close();
  346. }
  347. else
  348. replaceDoc();
  349. if (config.removeUnusedCSSRules)
  350. removeUnusedCSSRules();
  351. setContentResponse();
  352. }
  353. function sendSetContentResponse(content) {
  354. bgPort.postMessage({
  355. setContentResponse : true,
  356. winId : "0",
  357. pageId : pageId,
  358. content : config.getContent ? content : null
  359. });
  360. }
  361. function setContentResponse() {
  362. if (singlefile.processSelection)
  363. sendSetContentResponse(message.content);
  364. else {
  365. if (config.processInBackground)
  366. sendSetContentResponse(singlefile.util.getDocContent(doc, doc.documentElement));
  367. else
  368. sendSetContentResponse(config.removeUnusedCSSRules ? singlefile.util.getDocContent(doc, doc.documentElement) : singlefile.util
  369. .getDocContent(doc, docElement));
  370. }
  371. }
  372. if (config.displayProcessedPage) {
  373. window.location.href = "javascript:(" + resetWindowProperties.toString() + ")('" + JSON.stringify(message.winProperties) + "'); void 0;";
  374. timeoutSetContent = setTimeout(onWindowPropertiesCleaned, 3000);
  375. doc.addEventListener('WindowPropertiesCleaned', onWindowPropertiesCleaned, true);
  376. } else
  377. setContentResponse();
  378. }
  379. function getResourceContentResponse(message) {
  380. docs[message.winId].requestManager.onResponse(message.requestId, message.content);
  381. }
  382. function setFrameContentRequest(message) {
  383. docs[message.winId].frames[message.index].setAttribute("src", "data:text/html;charset=utf-8," + encodeURI(message.content));
  384. bgPort.postMessage({
  385. setFrameContentResponse : true,
  386. pageId : pageId,
  387. winId : message.winId,
  388. index : message.index
  389. });
  390. }
  391. function getContentRequest(message) {
  392. if (docs[message.winId].doc)
  393. bgPort.postMessage({
  394. getContentResponse : true,
  395. winId : message.winId,
  396. pageId : pageId,
  397. content : singlefile.util.getDocContent(docs[message.winId].doc, docs[message.winId].docElement)
  398. });
  399. else
  400. bgPort.postMessage({
  401. getContentResponse : true,
  402. pageId : pageId,
  403. winId : message.winId,
  404. content : singlefile.util.getDocContent(doc, docElement)
  405. });
  406. }
  407. function processDoc(message) {
  408. if (docs[message.winId])
  409. docs[message.winId].processDoc();
  410. }
  411. bgPort = chrome.extension.connect({
  412. name : "singlefile"
  413. });
  414. bgPort.onMessage.addListener(function(message) {
  415. // if (!message.getResourceContentResponse)
  416. // console.log(message);
  417. if (message.getResourceContentResponse)
  418. getResourceContentResponse(message);
  419. if (message.setFrameContentRequest)
  420. setFrameContentRequest(message);
  421. if (message.getContentRequest)
  422. getContentRequest(message);
  423. if (message.setContentRequest)
  424. setContentRequest(message);
  425. if (message.processDoc)
  426. processDoc(message);
  427. });
  428. if (doc.documentElement instanceof HTMLHtmlElement)
  429. init();
  430. })();