content-frame-tree.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global globalThis */
  24. import * as lazy from "./../../lazy/content/content-lazy-loader.js";
  25. import {
  26. ON_BEFORE_CAPTURE_EVENT_NAME,
  27. ON_AFTER_CAPTURE_EVENT_NAME,
  28. WIN_ID_ATTRIBUTE_NAME,
  29. waitForUserScript,
  30. preProcessDoc,
  31. serialize,
  32. postProcessDoc,
  33. getShadowRoot
  34. } from "./../../../single-file-helper.js";
  35. const helper = {
  36. ON_BEFORE_CAPTURE_EVENT_NAME,
  37. ON_AFTER_CAPTURE_EVENT_NAME,
  38. WIN_ID_ATTRIBUTE_NAME,
  39. waitForUserScript,
  40. preProcessDoc,
  41. serialize,
  42. postProcessDoc,
  43. getShadowRoot
  44. };
  45. const MESSAGE_PREFIX = "__frameTree__::";
  46. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  47. const ALL_ELEMENTS_CSS_SELECTOR = "*";
  48. const INIT_REQUEST_MESSAGE = "singlefile.frameTree.initRequest";
  49. const ACK_INIT_REQUEST_MESSAGE = "singlefile.frameTree.ackInitRequest";
  50. const CLEANUP_REQUEST_MESSAGE = "singlefile.frameTree.cleanupRequest";
  51. const INIT_RESPONSE_MESSAGE = "singlefile.frameTree.initResponse";
  52. const TARGET_ORIGIN = "*";
  53. const TIMEOUT_INIT_REQUEST_MESSAGE = 750;
  54. const TIMEOUT_INIT_RESPONSE_MESSAGE = 10000;
  55. const TOP_WINDOW_ID = "0";
  56. const WINDOW_ID_SEPARATOR = ".";
  57. const TOP_WINDOW = globalThis.window == globalThis.top;
  58. const browser = globalThis.browser;
  59. const addEventListener = (type, listener, options) => globalThis.addEventListener(type, listener, options);
  60. const top = globalThis.top;
  61. const MessageChannel = globalThis.MessageChannel;
  62. const document = globalThis.document;
  63. const sessions = new Map();
  64. let windowId;
  65. if (TOP_WINDOW) {
  66. windowId = TOP_WINDOW_ID;
  67. if (browser && browser.runtime && browser.runtime.onMessage && browser.runtime.onMessage.addListener) {
  68. browser.runtime.onMessage.addListener(message => {
  69. if (message.method == INIT_RESPONSE_MESSAGE) {
  70. initResponse(message);
  71. return Promise.resolve({});
  72. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  73. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  74. createFrameResponseTimeout(message.sessionId, message.windowId);
  75. return Promise.resolve({});
  76. }
  77. });
  78. }
  79. }
  80. addEventListener("message", async event => {
  81. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  82. event.preventDefault();
  83. event.stopPropagation();
  84. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  85. if (message.method == INIT_REQUEST_MESSAGE) {
  86. if (event.source) {
  87. sendMessage(event.source, { method: ACK_INIT_REQUEST_MESSAGE, windowId: message.windowId, sessionId: message.sessionId });
  88. }
  89. if (!TOP_WINDOW) {
  90. globalThis.stop();
  91. if (message.options.loadDeferredImages) {
  92. lazy.process(message.options);
  93. }
  94. await initRequestAsync(message);
  95. }
  96. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  97. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  98. createFrameResponseTimeout(message.sessionId, message.windowId);
  99. } else if (message.method == CLEANUP_REQUEST_MESSAGE) {
  100. cleanupRequest(message);
  101. } else if (message.method == INIT_RESPONSE_MESSAGE && sessions.get(message.sessionId)) {
  102. const port = event.ports[0];
  103. port.onmessage = event => initResponse(event.data);
  104. }
  105. }
  106. }, true);
  107. export {
  108. getAsync,
  109. getSync,
  110. cleanup,
  111. initResponse,
  112. TIMEOUT_INIT_REQUEST_MESSAGE
  113. };
  114. function getAsync(options) {
  115. const sessionId = getNewSessionId();
  116. options = JSON.parse(JSON.stringify(options));
  117. return new Promise(resolve => {
  118. sessions.set(sessionId, {
  119. frames: [],
  120. requestTimeouts: {},
  121. responseTimeouts: {},
  122. resolve: frames => {
  123. frames.sessionId = sessionId;
  124. resolve(frames);
  125. }
  126. });
  127. initRequestAsync({ windowId, sessionId, options });
  128. });
  129. }
  130. function getSync(options) {
  131. const sessionId = getNewSessionId();
  132. options = JSON.parse(JSON.stringify(options));
  133. sessions.set(sessionId, {
  134. frames: [],
  135. requestTimeouts: {},
  136. responseTimeouts: {}
  137. });
  138. initRequestSync({ windowId, sessionId, options });
  139. const frames = sessions.get(sessionId).frames;
  140. frames.sessionId = sessionId;
  141. return frames;
  142. }
  143. function cleanup(sessionId) {
  144. sessions.delete(sessionId);
  145. cleanupRequest({ windowId, sessionId, options: { sessionId } });
  146. }
  147. function getNewSessionId() {
  148. return globalThis.crypto.getRandomValues(new Uint32Array(32)).join("");
  149. }
  150. function initRequestSync(message) {
  151. const waitForUserScript = helper.waitForUserScript;
  152. const sessionId = message.sessionId;
  153. if (!TOP_WINDOW) {
  154. windowId = globalThis.frameId = message.windowId;
  155. }
  156. processFrames(document, message.options, windowId, sessionId);
  157. if (!TOP_WINDOW) {
  158. if (message.options.userScriptEnabled && waitForUserScript.callback) {
  159. waitForUserScript.callback(helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  160. }
  161. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  162. if (message.options.userScriptEnabled && waitForUserScript.callback) {
  163. waitForUserScript.callback(helper.ON_AFTER_CAPTURE_EVENT_NAME);
  164. }
  165. delete document.documentElement.dataset.requestedFrameId;
  166. }
  167. }
  168. async function initRequestAsync(message) {
  169. const waitForUserScript = helper.waitForUserScript;
  170. const sessionId = message.sessionId;
  171. if (!TOP_WINDOW) {
  172. windowId = globalThis.frameId = message.windowId;
  173. }
  174. processFrames(document, message.options, windowId, sessionId);
  175. if (!TOP_WINDOW) {
  176. if (message.options.userScriptEnabled && waitForUserScript.callback) {
  177. await waitForUserScript.callback(helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  178. }
  179. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  180. if (message.options.userScriptEnabled && waitForUserScript.callback) {
  181. await waitForUserScript.callback(helper.ON_AFTER_CAPTURE_EVENT_NAME);
  182. }
  183. delete document.documentElement.dataset.requestedFrameId;
  184. }
  185. }
  186. function cleanupRequest(message) {
  187. const sessionId = message.sessionId;
  188. cleanupFrames(getFrames(document), message.windowId, sessionId);
  189. }
  190. function initResponse(message) {
  191. message.frames.forEach(frameData => clearFrameTimeout("responseTimeouts", message.sessionId, frameData.windowId));
  192. const windowData = sessions.get(message.sessionId);
  193. if (windowData) {
  194. if (message.requestedFrameId) {
  195. windowData.requestedFrameId = message.requestedFrameId;
  196. }
  197. message.frames.forEach(messageFrameData => {
  198. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  199. if (!frameData) {
  200. frameData = { windowId: messageFrameData.windowId };
  201. windowData.frames.push(frameData);
  202. }
  203. if (!frameData.processed) {
  204. frameData.content = messageFrameData.content;
  205. frameData.baseURI = messageFrameData.baseURI;
  206. frameData.title = messageFrameData.title;
  207. frameData.canvases = messageFrameData.canvases;
  208. frameData.fonts = messageFrameData.fonts;
  209. frameData.stylesheets = messageFrameData.stylesheets;
  210. frameData.images = messageFrameData.images;
  211. frameData.posters = messageFrameData.posters;
  212. frameData.usedFonts = messageFrameData.usedFonts;
  213. frameData.shadowRoots = messageFrameData.shadowRoots;
  214. frameData.imports = messageFrameData.imports;
  215. frameData.processed = messageFrameData.processed;
  216. }
  217. });
  218. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  219. if (!remainingFrames) {
  220. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  221. if (windowData.resolve) {
  222. if (windowData.requestedFrameId) {
  223. windowData.frames.forEach(frameData => {
  224. if (frameData.windowId == windowData.requestedFrameId) {
  225. frameData.requestedFrame = true;
  226. }
  227. });
  228. }
  229. windowData.resolve(windowData.frames);
  230. }
  231. }
  232. }
  233. }
  234. function processFrames(doc, options, parentWindowId, sessionId) {
  235. const frameElements = getFrames(doc);
  236. processFramesAsync(doc, frameElements, options, parentWindowId, sessionId);
  237. if (frameElements.length) {
  238. processFramesSync(doc, frameElements, options, parentWindowId, sessionId);
  239. }
  240. }
  241. function processFramesAsync(doc, frameElements, options, parentWindowId, sessionId) {
  242. const frames = [];
  243. let requestTimeouts;
  244. if (sessions.get(sessionId)) {
  245. requestTimeouts = sessions.get(sessionId).requestTimeouts;
  246. } else {
  247. requestTimeouts = {};
  248. sessions.set(sessionId, { requestTimeouts });
  249. }
  250. frameElements.forEach((frameElement, frameIndex) => {
  251. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  252. frameElement.setAttribute(helper.WIN_ID_ATTRIBUTE_NAME, windowId);
  253. frames.push({ windowId });
  254. });
  255. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  256. frameElements.forEach((frameElement, frameIndex) => {
  257. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  258. try {
  259. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  260. } catch (error) {
  261. // ignored
  262. }
  263. requestTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId, processed: true }], sessionId }), TIMEOUT_INIT_REQUEST_MESSAGE);
  264. });
  265. delete doc.documentElement.dataset.requestedFrameId;
  266. }
  267. function processFramesSync(doc, frameElements, options, parentWindowId, sessionId) {
  268. const frames = [];
  269. frameElements.forEach((frameElement, frameIndex) => {
  270. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  271. let frameDoc;
  272. try {
  273. frameDoc = frameElement.contentDocument;
  274. } catch (error) {
  275. // ignored
  276. }
  277. if (frameDoc) {
  278. try {
  279. const frameWindow = frameElement.contentWindow;
  280. frameWindow.stop();
  281. clearFrameTimeout("requestTimeouts", sessionId, windowId);
  282. processFrames(frameDoc, options, windowId, sessionId);
  283. frames.push(getFrameData(frameDoc, frameWindow, windowId, options));
  284. } catch (error) {
  285. frames.push({ windowId, processed: true });
  286. }
  287. }
  288. });
  289. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  290. delete doc.documentElement.dataset.requestedFrameId;
  291. }
  292. function clearFrameTimeout(type, sessionId, windowId) {
  293. const session = sessions.get(sessionId);
  294. if (session && session[type]) {
  295. const timeout = session[type][windowId];
  296. if (timeout) {
  297. globalThis.clearTimeout(timeout);
  298. delete session[type][windowId];
  299. }
  300. }
  301. }
  302. function createFrameResponseTimeout(sessionId, windowId) {
  303. const session = sessions.get(sessionId);
  304. if (session && session.responseTimeouts) {
  305. session.responseTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId: windowId, processed: true }], sessionId: sessionId }), TIMEOUT_INIT_RESPONSE_MESSAGE);
  306. }
  307. }
  308. function cleanupFrames(frameElements, parentWindowId, sessionId) {
  309. frameElements.forEach((frameElement, frameIndex) => {
  310. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  311. frameElement.removeAttribute(helper.WIN_ID_ATTRIBUTE_NAME);
  312. try {
  313. sendMessage(frameElement.contentWindow, { method: CLEANUP_REQUEST_MESSAGE, windowId, sessionId });
  314. } catch (error) {
  315. // ignored
  316. }
  317. });
  318. frameElements.forEach((frameElement, frameIndex) => {
  319. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  320. let frameDoc;
  321. try {
  322. frameDoc = frameElement.contentDocument;
  323. } catch (error) {
  324. // ignored
  325. }
  326. if (frameDoc) {
  327. try {
  328. cleanupFrames(getFrames(frameDoc), windowId, sessionId);
  329. } catch (error) {
  330. // ignored
  331. }
  332. }
  333. });
  334. }
  335. function sendInitResponse(message) {
  336. message.method = INIT_RESPONSE_MESSAGE;
  337. try {
  338. top.frameTree.initResponse(message);
  339. } catch (error) {
  340. sendMessage(top, message, true);
  341. }
  342. }
  343. function sendMessage(targetWindow, message, useChannel) {
  344. if (targetWindow == top && browser && browser.runtime && browser.runtime.sendMessage) {
  345. browser.runtime.sendMessage(message);
  346. } else {
  347. if (useChannel) {
  348. const channel = new MessageChannel();
  349. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method, sessionId: message.sessionId }), TARGET_ORIGIN, [channel.port2]);
  350. channel.port1.postMessage(message);
  351. } else {
  352. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  353. }
  354. }
  355. }
  356. function getFrameData(document, globalThis, windowId, options) {
  357. const docData = helper.preProcessDoc(document, globalThis, options);
  358. const content = helper.serialize(document);
  359. helper.postProcessDoc(document, docData.markedElements);
  360. const baseURI = document.baseURI.split("#")[0];
  361. return {
  362. windowId,
  363. content,
  364. baseURI,
  365. title: document.title,
  366. canvases: docData.canvases,
  367. fonts: docData.fonts,
  368. stylesheets: docData.stylesheets,
  369. images: docData.images,
  370. posters: docData.posters,
  371. usedFonts: docData.usedFonts,
  372. shadowRoots: docData.shadowRoots,
  373. imports: docData.imports,
  374. processed: true
  375. };
  376. }
  377. function getFrames(document) {
  378. let frames = Array.from(document.querySelectorAll(FRAMES_CSS_SELECTOR));
  379. document.querySelectorAll(ALL_ELEMENTS_CSS_SELECTOR).forEach(element => {
  380. const shadowRoot = helper.getShadowRoot(element);
  381. if (shadowRoot) {
  382. frames = frames.concat(...shadowRoot.querySelectorAll(FRAMES_CSS_SELECTOR));
  383. }
  384. });
  385. return frames;
  386. }