content-frame-tree.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global window */
  24. this.singlefile.lib.processors.frameTree.content.frames = this.singlefile.lib.processors.frameTree.content.frames || (() => {
  25. const singlefile = this.singlefile;
  26. const MESSAGE_PREFIX = "__frameTree__::";
  27. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  28. const ALL_ELEMENTS_CSS_SELECTOR = "*";
  29. const INIT_REQUEST_MESSAGE = "singlefile.frameTree.initRequest";
  30. const CLEANUP_REQUEST_MESSAGE = "singlefile.frameTree.cleanupRequest";
  31. const INIT_RESPONSE_MESSAGE = "singlefile.frameTree.initResponse";
  32. const TARGET_ORIGIN = "*";
  33. const TIMEOUT_INIT_REQUEST_MESSAGE = 5000;
  34. const TOP_WINDOW_ID = "0";
  35. const WINDOW_ID_SEPARATOR = ".";
  36. const TOP_WINDOW = window == window.top;
  37. const browser = this.browser;
  38. const addEventListener = window.addEventListener;
  39. const top = window.top;
  40. const MessageChannel = window.MessageChannel;
  41. const document = window.document;
  42. const setTimeout = window.setTimeout;
  43. const sessions = new Map();
  44. let windowId;
  45. if (TOP_WINDOW) {
  46. windowId = TOP_WINDOW_ID;
  47. if (browser && browser.runtime && browser.runtime.onMessage && browser.runtime.onMessage.addListener) {
  48. browser.runtime.onMessage.addListener(message => {
  49. if (message.method == INIT_RESPONSE_MESSAGE) {
  50. console.log(message);
  51. initResponse(message);
  52. return Promise.resolve({});
  53. }
  54. });
  55. }
  56. }
  57. addEventListener.call(window, "message", async event => {
  58. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  59. event.preventDefault();
  60. event.stopPropagation();
  61. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  62. if (!TOP_WINDOW && message.method == INIT_REQUEST_MESSAGE) {
  63. window.stop();
  64. if (message.options.loadDeferredImages && singlefile.lib.processors.lazy.content.loader) {
  65. singlefile.lib.processors.lazy.content.loader.process(message.options);
  66. }
  67. await initRequestAsync(message);
  68. } else if (message.method == CLEANUP_REQUEST_MESSAGE) {
  69. cleanupRequest(message);
  70. } else if ((!browser || !browser.runtime) && message.method == INIT_RESPONSE_MESSAGE) {
  71. const port = event.ports[0];
  72. port.onmessage = event => initResponse(event.data);
  73. }
  74. }
  75. }, true);
  76. return {
  77. getAsync: async options => {
  78. const sessionId = options.sessionId || 0;
  79. options = JSON.parse(JSON.stringify(options));
  80. return new Promise(async resolve => {
  81. sessions.set(sessionId, { frames: [], resolve });
  82. await initRequestAsync({ windowId, sessionId, options });
  83. });
  84. },
  85. getSync: options => {
  86. const sessionId = options.sessionId || 0;
  87. options = JSON.parse(JSON.stringify(options));
  88. sessions.set(sessionId, { frames: [] });
  89. initRequestSync({ windowId, sessionId, options });
  90. return sessions.get(sessionId).frames;
  91. },
  92. cleanup: options => {
  93. const sessionId = options.sessionId || 0;
  94. cleanupRequest({ windowId, sessionId, options: { sessionId } });
  95. },
  96. initResponse,
  97. TIMEOUT_INIT_REQUEST_MESSAGE
  98. };
  99. function initRequestSync(message) {
  100. const waitForUserScript = singlefile.lib.helper.waitForUserScript;
  101. const sessionId = message.sessionId;
  102. if (!TOP_WINDOW) {
  103. windowId = window.frameId = message.windowId;
  104. }
  105. processFrames(document, message.options, windowId, sessionId);
  106. if (!TOP_WINDOW) {
  107. if (message.options.userScriptEnabled && waitForUserScript) {
  108. waitForUserScript(singlefile.lib.helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  109. }
  110. sendInitResponse({ frames: [getFrameData(document, window, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  111. if (message.options.userScriptEnabled && waitForUserScript) {
  112. waitForUserScript(singlefile.lib.helper.ON_AFTER_CAPTURE_EVENT_NAME);
  113. }
  114. delete document.documentElement.dataset.requestedFrameId;
  115. }
  116. }
  117. async function initRequestAsync(message) {
  118. const waitForUserScript = singlefile.lib.helper.waitForUserScript;
  119. const sessionId = message.sessionId;
  120. if (!TOP_WINDOW) {
  121. windowId = window.frameId = message.windowId;
  122. }
  123. processFrames(document, message.options, windowId, sessionId);
  124. if (!TOP_WINDOW) {
  125. if (message.options.userScriptEnabled && waitForUserScript) {
  126. await waitForUserScript(singlefile.lib.helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  127. }
  128. sendInitResponse({ frames: [getFrameData(document, window, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  129. if (message.options.userScriptEnabled && waitForUserScript) {
  130. await waitForUserScript(singlefile.lib.helper.ON_AFTER_CAPTURE_EVENT_NAME);
  131. }
  132. delete document.documentElement.dataset.requestedFrameId;
  133. }
  134. }
  135. function cleanupRequest(message) {
  136. const sessionId = message.sessionId;
  137. cleanupFrames(getFrames(document), message.windowId, sessionId);
  138. }
  139. function initResponse(message) {
  140. const windowData = sessions.get(message.sessionId);
  141. if (windowData) {
  142. if (message.requestedFrameId) {
  143. windowData.requestedFrameId = message.requestedFrameId;
  144. }
  145. message.frames.forEach(messageFrameData => {
  146. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  147. if (!frameData) {
  148. frameData = { windowId: messageFrameData.windowId };
  149. windowData.frames.push(frameData);
  150. }
  151. if (!frameData.processed) {
  152. frameData.content = messageFrameData.content;
  153. frameData.baseURI = messageFrameData.baseURI;
  154. frameData.title = messageFrameData.title;
  155. frameData.canvases = messageFrameData.canvases;
  156. frameData.fonts = messageFrameData.fonts;
  157. frameData.stylesheets = messageFrameData.stylesheets;
  158. frameData.images = messageFrameData.images;
  159. frameData.posters = messageFrameData.posters;
  160. frameData.usedFonts = messageFrameData.usedFonts;
  161. frameData.shadowRoots = messageFrameData.shadowRoots;
  162. frameData.imports = messageFrameData.imports;
  163. frameData.processed = messageFrameData.processed;
  164. }
  165. });
  166. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  167. if (!remainingFrames) {
  168. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  169. if (windowData.resolve) {
  170. if (windowData.requestedFrameId) {
  171. windowData.frames.forEach(frameData => {
  172. if (frameData.windowId == windowData.requestedFrameId) {
  173. frameData.requestedFrame = true;
  174. }
  175. });
  176. }
  177. windowData.resolve(windowData.frames);
  178. }
  179. }
  180. }
  181. }
  182. function processFrames(doc, options, parentWindowId, sessionId) {
  183. const frameElements = getFrames(doc);
  184. processFramesAsync(doc, frameElements, options, parentWindowId, sessionId);
  185. if (frameElements.length) {
  186. processFramesSync(doc, frameElements, options, parentWindowId, sessionId);
  187. }
  188. }
  189. function processFramesAsync(doc, frameElements, options, parentWindowId, sessionId) {
  190. const frames = [];
  191. frameElements.forEach((frameElement, frameIndex) => {
  192. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  193. frameElement.setAttribute(singlefile.lib.helper.WIN_ID_ATTRIBUTE_NAME, windowId);
  194. frames.push({ windowId });
  195. try {
  196. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  197. } catch (error) {
  198. // ignored
  199. }
  200. setTimeout.call(window, () => sendInitResponse({ frames: [{ windowId, processed: true }], sessionId }), TIMEOUT_INIT_REQUEST_MESSAGE);
  201. });
  202. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  203. delete doc.documentElement.dataset.requestedFrameId;
  204. }
  205. function processFramesSync(doc, frameElements, options, parentWindowId, sessionId) {
  206. const frames = [];
  207. frameElements.forEach((frameElement, frameIndex) => {
  208. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  209. let frameDoc;
  210. try {
  211. frameDoc = frameElement.contentDocument;
  212. } catch (error) {
  213. // ignored
  214. }
  215. if (frameDoc) {
  216. try {
  217. const frameWindow = frameElement.contentWindow;
  218. frameWindow.stop();
  219. processFrames(frameDoc, options, windowId, sessionId);
  220. frames.push(getFrameData(frameDoc, frameWindow, windowId, options));
  221. } catch (error) {
  222. frames.push({ windowId, processed: true });
  223. }
  224. }
  225. });
  226. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  227. delete doc.documentElement.dataset.requestedFrameId;
  228. }
  229. function cleanupFrames(frameElements, parentWindowId, sessionId) {
  230. frameElements.forEach((frameElement, frameIndex) => {
  231. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  232. frameElement.removeAttribute(singlefile.lib.helper.WIN_ID_ATTRIBUTE_NAME);
  233. try {
  234. sendMessage(frameElement.contentWindow, { method: CLEANUP_REQUEST_MESSAGE, windowId, sessionId });
  235. } catch (error) {
  236. // ignored
  237. }
  238. });
  239. frameElements.forEach((frameElement, frameIndex) => {
  240. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  241. let frameDoc;
  242. try {
  243. frameDoc = frameElement.contentDocument;
  244. } catch (error) {
  245. // ignored
  246. }
  247. if (frameDoc) {
  248. try {
  249. cleanupFrames(getFrames(frameDoc), windowId, sessionId);
  250. } catch (error) {
  251. // ignored
  252. }
  253. }
  254. });
  255. }
  256. function sendInitResponse(message) {
  257. message.method = INIT_RESPONSE_MESSAGE;
  258. try {
  259. top.singlefile.lib.processors.frameTree.content.frames.initResponse(message);
  260. } catch (error) {
  261. sendMessage(top, message, true);
  262. }
  263. }
  264. function sendMessage(targetWindow, message, useChannel) {
  265. if (targetWindow == top && browser && browser.runtime && browser.runtime.sendMessage) {
  266. browser.runtime.sendMessage(message);
  267. } else {
  268. if (useChannel) {
  269. const channel = new MessageChannel();
  270. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method }), TARGET_ORIGIN, [channel.port2]);
  271. channel.port1.postMessage(message);
  272. } else {
  273. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  274. }
  275. }
  276. }
  277. function getFrameData(document, window, windowId, options) {
  278. const helper = singlefile.lib.helper;
  279. const docData = helper.preProcessDoc(document, window, options);
  280. const content = helper.serialize(document);
  281. helper.postProcessDoc(document, docData.markedElements);
  282. const baseURI = document.baseURI.split("#")[0];
  283. return {
  284. windowId,
  285. content,
  286. baseURI,
  287. title: document.title,
  288. canvases: docData.canvases,
  289. fonts: docData.fonts,
  290. stylesheets: docData.stylesheets,
  291. images: docData.images,
  292. posters: docData.posters,
  293. usedFonts: docData.usedFonts,
  294. shadowRoots: docData.shadowRoots,
  295. imports: docData.imports,
  296. processed: true
  297. };
  298. }
  299. function getFrames(document) {
  300. let frames = Array.from(document.querySelectorAll(FRAMES_CSS_SELECTOR));
  301. document.querySelectorAll(ALL_ELEMENTS_CSS_SELECTOR).forEach(element => {
  302. if (element.shadowRoot) {
  303. frames = frames.concat(...element.shadowRoot.querySelectorAll(FRAMES_CSS_SELECTOR));
  304. }
  305. });
  306. return frames;
  307. }
  308. })();