content-frame-tree.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global window, globalThis */
  24. this.singlefile.lib.processors.frameTree.content.frames = this.singlefile.lib.processors.frameTree.content.frames || (globalThis => {
  25. const singlefile = this.singlefile;
  26. const MESSAGE_PREFIX = "__frameTree__::";
  27. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  28. const ALL_ELEMENTS_CSS_SELECTOR = "*";
  29. const INIT_REQUEST_MESSAGE = "singlefile.frameTree.initRequest";
  30. const ACK_INIT_REQUEST_MESSAGE = "singlefile.frameTree.ackInitRequest";
  31. const CLEANUP_REQUEST_MESSAGE = "singlefile.frameTree.cleanupRequest";
  32. const INIT_RESPONSE_MESSAGE = "singlefile.frameTree.initResponse";
  33. const TARGET_ORIGIN = "*";
  34. const TIMEOUT_INIT_REQUEST_MESSAGE = 750;
  35. const TIMEOUT_INIT_RESPONSE_MESSAGE = 10000;
  36. const TOP_WINDOW_ID = "0";
  37. const WINDOW_ID_SEPARATOR = ".";
  38. const TOP_WINDOW = globalThis.window == globalThis.top;
  39. const browser = globalThis.browser;
  40. const addEventListener = (type, listener, options) => globalThis.addEventListener(type, listener, options);
  41. const top = globalThis.top;
  42. const MessageChannel = globalThis.MessageChannel;
  43. const document = globalThis.document;
  44. const sessions = new Map();
  45. let windowId;
  46. if (TOP_WINDOW) {
  47. windowId = TOP_WINDOW_ID;
  48. if (browser && browser.runtime && browser.runtime.onMessage && browser.runtime.onMessage.addListener) {
  49. browser.runtime.onMessage.addListener(message => {
  50. if (message.method == INIT_RESPONSE_MESSAGE) {
  51. initResponse(message);
  52. return Promise.resolve({});
  53. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  54. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  55. createFrameResponseTimeout(message.sessionId, message.windowId);
  56. return Promise.resolve({});
  57. }
  58. });
  59. }
  60. }
  61. addEventListener("message", async event => {
  62. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  63. event.preventDefault();
  64. event.stopPropagation();
  65. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  66. if (message.method == INIT_REQUEST_MESSAGE) {
  67. if (event.source) {
  68. sendMessage(event.source, { method: ACK_INIT_REQUEST_MESSAGE, windowId: message.windowId, sessionId: message.sessionId });
  69. }
  70. if (!TOP_WINDOW) {
  71. globalThis.stop();
  72. if (message.options.loadDeferredImages && singlefile.lib.processors.lazy.content.loader) {
  73. singlefile.lib.processors.lazy.content.loader.process(message.options);
  74. }
  75. await initRequestAsync(message);
  76. }
  77. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  78. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  79. createFrameResponseTimeout(message.sessionId, message.windowId);
  80. } else if (message.method == CLEANUP_REQUEST_MESSAGE) {
  81. cleanupRequest(message);
  82. } else if (message.method == INIT_RESPONSE_MESSAGE && sessions.get(message.sessionId)) {
  83. const port = event.ports[0];
  84. port.onmessage = event => initResponse(event.data);
  85. }
  86. }
  87. }, true);
  88. return {
  89. getAsync,
  90. getSync,
  91. cleanup,
  92. initResponse,
  93. TIMEOUT_INIT_REQUEST_MESSAGE
  94. };
  95. function getAsync(options) {
  96. const sessionId = getNewSessionId();
  97. options = JSON.parse(JSON.stringify(options));
  98. return new Promise(resolve => {
  99. sessions.set(sessionId, {
  100. frames: [],
  101. requestTimeouts: {},
  102. responseTimeouts: {},
  103. resolve: frames => {
  104. frames.sessionId = sessionId;
  105. resolve(frames);
  106. }
  107. });
  108. initRequestAsync({ windowId, sessionId, options });
  109. });
  110. }
  111. function getSync(options) {
  112. const sessionId = getNewSessionId();
  113. options = JSON.parse(JSON.stringify(options));
  114. sessions.set(sessionId, {
  115. frames: [],
  116. requestTimeouts: {},
  117. responseTimeouts: {}
  118. });
  119. initRequestSync({ windowId, sessionId, options });
  120. const frames = sessions.get(sessionId).frames;
  121. frames.sessionId = sessionId;
  122. return frames;
  123. }
  124. function cleanup(sessionId) {
  125. sessions.delete(sessionId);
  126. cleanupRequest({ windowId, sessionId, options: { sessionId } });
  127. }
  128. function getNewSessionId() {
  129. return globalThis.crypto.getRandomValues(new Uint32Array(32)).join("");
  130. }
  131. function initRequestSync(message) {
  132. const waitForUserScript = singlefile.lib.helper.waitForUserScript;
  133. const sessionId = message.sessionId;
  134. if (!TOP_WINDOW) {
  135. windowId = globalThis.frameId = message.windowId;
  136. }
  137. processFrames(document, message.options, windowId, sessionId);
  138. if (!TOP_WINDOW) {
  139. if (message.options.userScriptEnabled && waitForUserScript) {
  140. waitForUserScript(singlefile.lib.helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  141. }
  142. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  143. if (message.options.userScriptEnabled && waitForUserScript) {
  144. waitForUserScript(singlefile.lib.helper.ON_AFTER_CAPTURE_EVENT_NAME);
  145. }
  146. delete document.documentElement.dataset.requestedFrameId;
  147. }
  148. }
  149. async function initRequestAsync(message) {
  150. const waitForUserScript = singlefile.lib.helper.waitForUserScript;
  151. const sessionId = message.sessionId;
  152. if (!TOP_WINDOW) {
  153. windowId = globalThis.frameId = message.windowId;
  154. }
  155. processFrames(document, message.options, windowId, sessionId);
  156. if (!TOP_WINDOW) {
  157. if (message.options.userScriptEnabled && waitForUserScript) {
  158. await waitForUserScript(singlefile.lib.helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  159. }
  160. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  161. if (message.options.userScriptEnabled && waitForUserScript) {
  162. await waitForUserScript(singlefile.lib.helper.ON_AFTER_CAPTURE_EVENT_NAME);
  163. }
  164. delete document.documentElement.dataset.requestedFrameId;
  165. }
  166. }
  167. function cleanupRequest(message) {
  168. const sessionId = message.sessionId;
  169. cleanupFrames(getFrames(document), message.windowId, sessionId);
  170. }
  171. function initResponse(message) {
  172. message.frames.forEach(frameData => clearFrameTimeout("responseTimeouts", message.sessionId, frameData.windowId));
  173. const windowData = sessions.get(message.sessionId);
  174. if (windowData) {
  175. if (message.requestedFrameId) {
  176. windowData.requestedFrameId = message.requestedFrameId;
  177. }
  178. message.frames.forEach(messageFrameData => {
  179. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  180. if (!frameData) {
  181. frameData = { windowId: messageFrameData.windowId };
  182. windowData.frames.push(frameData);
  183. }
  184. if (!frameData.processed) {
  185. frameData.content = messageFrameData.content;
  186. frameData.baseURI = messageFrameData.baseURI;
  187. frameData.title = messageFrameData.title;
  188. frameData.canvases = messageFrameData.canvases;
  189. frameData.fonts = messageFrameData.fonts;
  190. frameData.stylesheets = messageFrameData.stylesheets;
  191. frameData.images = messageFrameData.images;
  192. frameData.posters = messageFrameData.posters;
  193. frameData.usedFonts = messageFrameData.usedFonts;
  194. frameData.shadowRoots = messageFrameData.shadowRoots;
  195. frameData.imports = messageFrameData.imports;
  196. frameData.processed = messageFrameData.processed;
  197. }
  198. });
  199. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  200. if (!remainingFrames) {
  201. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  202. if (windowData.resolve) {
  203. if (windowData.requestedFrameId) {
  204. windowData.frames.forEach(frameData => {
  205. if (frameData.windowId == windowData.requestedFrameId) {
  206. frameData.requestedFrame = true;
  207. }
  208. });
  209. }
  210. windowData.resolve(windowData.frames);
  211. }
  212. }
  213. }
  214. }
  215. function processFrames(doc, options, parentWindowId, sessionId) {
  216. const frameElements = getFrames(doc);
  217. processFramesAsync(doc, frameElements, options, parentWindowId, sessionId);
  218. if (frameElements.length) {
  219. processFramesSync(doc, frameElements, options, parentWindowId, sessionId);
  220. }
  221. }
  222. function processFramesAsync(doc, frameElements, options, parentWindowId, sessionId) {
  223. const frames = [];
  224. let requestTimeouts;
  225. if (sessions.get(sessionId)) {
  226. requestTimeouts = sessions.get(sessionId).requestTimeouts;
  227. } else {
  228. requestTimeouts = {};
  229. sessions.set(sessionId, { requestTimeouts });
  230. }
  231. frameElements.forEach((frameElement, frameIndex) => {
  232. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  233. frameElement.setAttribute(singlefile.lib.helper.WIN_ID_ATTRIBUTE_NAME, windowId);
  234. frames.push({ windowId });
  235. });
  236. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  237. frameElements.forEach((frameElement, frameIndex) => {
  238. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  239. try {
  240. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  241. } catch (error) {
  242. // ignored
  243. }
  244. requestTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId, processed: true }], sessionId }), TIMEOUT_INIT_REQUEST_MESSAGE);
  245. });
  246. delete doc.documentElement.dataset.requestedFrameId;
  247. }
  248. function processFramesSync(doc, frameElements, options, parentWindowId, sessionId) {
  249. const frames = [];
  250. frameElements.forEach((frameElement, frameIndex) => {
  251. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  252. let frameDoc;
  253. try {
  254. frameDoc = frameElement.contentDocument;
  255. } catch (error) {
  256. // ignored
  257. }
  258. if (frameDoc) {
  259. try {
  260. const frameWindow = frameElement.contentWindow;
  261. frameWindow.stop();
  262. clearFrameTimeout("requestTimeouts", sessionId, windowId);
  263. processFrames(frameDoc, options, windowId, sessionId);
  264. frames.push(getFrameData(frameDoc, frameWindow, windowId, options));
  265. } catch (error) {
  266. frames.push({ windowId, processed: true });
  267. }
  268. }
  269. });
  270. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  271. delete doc.documentElement.dataset.requestedFrameId;
  272. }
  273. function clearFrameTimeout(type, sessionId, windowId) {
  274. const session = sessions.get(sessionId);
  275. if (session && session[type]) {
  276. const timeout = session[type][windowId];
  277. if (timeout) {
  278. globalThis.clearTimeout(timeout);
  279. delete session[type][windowId];
  280. }
  281. }
  282. }
  283. function createFrameResponseTimeout(sessionId, windowId) {
  284. const session = sessions.get(sessionId);
  285. if (session && session.responseTimeouts) {
  286. session.responseTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId: windowId, processed: true }], sessionId: sessionId }), TIMEOUT_INIT_RESPONSE_MESSAGE);
  287. }
  288. }
  289. function cleanupFrames(frameElements, parentWindowId, sessionId) {
  290. frameElements.forEach((frameElement, frameIndex) => {
  291. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  292. frameElement.removeAttribute(singlefile.lib.helper.WIN_ID_ATTRIBUTE_NAME);
  293. try {
  294. sendMessage(frameElement.contentWindow, { method: CLEANUP_REQUEST_MESSAGE, windowId, sessionId });
  295. } catch (error) {
  296. // ignored
  297. }
  298. });
  299. frameElements.forEach((frameElement, frameIndex) => {
  300. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  301. let frameDoc;
  302. try {
  303. frameDoc = frameElement.contentDocument;
  304. } catch (error) {
  305. // ignored
  306. }
  307. if (frameDoc) {
  308. try {
  309. cleanupFrames(getFrames(frameDoc), windowId, sessionId);
  310. } catch (error) {
  311. // ignored
  312. }
  313. }
  314. });
  315. }
  316. function sendInitResponse(message) {
  317. message.method = INIT_RESPONSE_MESSAGE;
  318. try {
  319. top.singlefile.lib.processors.frameTree.content.frames.initResponse(message);
  320. } catch (error) {
  321. sendMessage(top, message, true);
  322. }
  323. }
  324. function sendMessage(targetWindow, message, useChannel) {
  325. if (targetWindow == top && browser && browser.runtime && browser.runtime.sendMessage) {
  326. browser.runtime.sendMessage(message);
  327. } else {
  328. if (useChannel) {
  329. const channel = new MessageChannel();
  330. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method, sessionId: message.sessionId }), TARGET_ORIGIN, [channel.port2]);
  331. channel.port1.postMessage(message);
  332. } else {
  333. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  334. }
  335. }
  336. }
  337. function getFrameData(document, globalThis, windowId, options) {
  338. const helper = singlefile.lib.helper;
  339. const docData = helper.preProcessDoc(document, globalThis, options);
  340. const content = helper.serialize(document);
  341. helper.postProcessDoc(document, docData.markedElements);
  342. const baseURI = document.baseURI.split("#")[0];
  343. return {
  344. windowId,
  345. content,
  346. baseURI,
  347. title: document.title,
  348. canvases: docData.canvases,
  349. fonts: docData.fonts,
  350. stylesheets: docData.stylesheets,
  351. images: docData.images,
  352. posters: docData.posters,
  353. usedFonts: docData.usedFonts,
  354. shadowRoots: docData.shadowRoots,
  355. imports: docData.imports,
  356. processed: true
  357. };
  358. }
  359. function getFrames(document) {
  360. let frames = Array.from(document.querySelectorAll(FRAMES_CSS_SELECTOR));
  361. document.querySelectorAll(ALL_ELEMENTS_CSS_SELECTOR).forEach(element => {
  362. const shadowRoot = singlefile.lib.helper.getShadowRoot(element);
  363. if (shadowRoot) {
  364. frames = frames.concat(...shadowRoot.querySelectorAll(FRAMES_CSS_SELECTOR));
  365. }
  366. });
  367. return frames;
  368. }
  369. })(typeof globalThis == "object" ? globalThis : window);