content-frame-tree.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global globalThis */
  24. import * as lazy from "./../../lazy/content/content-lazy-loader.js";
  25. import {
  26. ON_BEFORE_CAPTURE_EVENT_NAME,
  27. ON_AFTER_CAPTURE_EVENT_NAME,
  28. WIN_ID_ATTRIBUTE_NAME,
  29. preProcessDoc,
  30. serialize,
  31. postProcessDoc,
  32. getShadowRoot
  33. } from "./../../../single-file-helper.js";
  34. const helper = {
  35. ON_BEFORE_CAPTURE_EVENT_NAME,
  36. ON_AFTER_CAPTURE_EVENT_NAME,
  37. WIN_ID_ATTRIBUTE_NAME,
  38. preProcessDoc,
  39. serialize,
  40. postProcessDoc,
  41. getShadowRoot
  42. };
  43. const MESSAGE_PREFIX = "__frameTree__::";
  44. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  45. const ALL_ELEMENTS_CSS_SELECTOR = "*";
  46. const INIT_REQUEST_MESSAGE = "singlefile.frameTree.initRequest";
  47. const ACK_INIT_REQUEST_MESSAGE = "singlefile.frameTree.ackInitRequest";
  48. const CLEANUP_REQUEST_MESSAGE = "singlefile.frameTree.cleanupRequest";
  49. const INIT_RESPONSE_MESSAGE = "singlefile.frameTree.initResponse";
  50. const TARGET_ORIGIN = "*";
  51. const TIMEOUT_INIT_REQUEST_MESSAGE = 750;
  52. const TIMEOUT_INIT_RESPONSE_MESSAGE = 10000;
  53. const TOP_WINDOW_ID = "0";
  54. const WINDOW_ID_SEPARATOR = ".";
  55. const TOP_WINDOW = globalThis.window == globalThis.top;
  56. const browser = globalThis.browser;
  57. const addEventListener = (type, listener, options) => globalThis.addEventListener(type, listener, options);
  58. const top = globalThis.top;
  59. const MessageChannel = globalThis.MessageChannel;
  60. const document = globalThis.document;
  61. const sessions = new Map();
  62. let windowId;
  63. if (TOP_WINDOW) {
  64. windowId = TOP_WINDOW_ID;
  65. if (browser && browser.runtime && browser.runtime.onMessage && browser.runtime.onMessage.addListener) {
  66. browser.runtime.onMessage.addListener(message => {
  67. if (message.method == INIT_RESPONSE_MESSAGE) {
  68. initResponse(message);
  69. return Promise.resolve({});
  70. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  71. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  72. createFrameResponseTimeout(message.sessionId, message.windowId);
  73. return Promise.resolve({});
  74. }
  75. });
  76. }
  77. }
  78. addEventListener("message", async event => {
  79. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  80. event.preventDefault();
  81. event.stopPropagation();
  82. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  83. if (message.method == INIT_REQUEST_MESSAGE) {
  84. if (event.source) {
  85. sendMessage(event.source, { method: ACK_INIT_REQUEST_MESSAGE, windowId: message.windowId, sessionId: message.sessionId });
  86. }
  87. if (!TOP_WINDOW) {
  88. globalThis.stop();
  89. if (message.options.loadDeferredImages) {
  90. lazy.process(message.options);
  91. }
  92. await initRequestAsync(message);
  93. }
  94. } else if (message.method == ACK_INIT_REQUEST_MESSAGE) {
  95. clearFrameTimeout("requestTimeouts", message.sessionId, message.windowId);
  96. createFrameResponseTimeout(message.sessionId, message.windowId);
  97. } else if (message.method == CLEANUP_REQUEST_MESSAGE) {
  98. cleanupRequest(message);
  99. } else if (message.method == INIT_RESPONSE_MESSAGE && sessions.get(message.sessionId)) {
  100. const port = event.ports[0];
  101. port.onmessage = event => initResponse(event.data);
  102. }
  103. }
  104. }, true);
  105. export {
  106. getAsync,
  107. getSync,
  108. cleanup,
  109. initResponse,
  110. TIMEOUT_INIT_REQUEST_MESSAGE
  111. };
  112. function getAsync(options) {
  113. const sessionId = getNewSessionId();
  114. options = JSON.parse(JSON.stringify(options));
  115. return new Promise(resolve => {
  116. sessions.set(sessionId, {
  117. frames: [],
  118. requestTimeouts: {},
  119. responseTimeouts: {},
  120. resolve: frames => {
  121. frames.sessionId = sessionId;
  122. resolve(frames);
  123. }
  124. });
  125. initRequestAsync({ windowId, sessionId, options });
  126. });
  127. }
  128. function getSync(options) {
  129. const sessionId = getNewSessionId();
  130. options = JSON.parse(JSON.stringify(options));
  131. sessions.set(sessionId, {
  132. frames: [],
  133. requestTimeouts: {},
  134. responseTimeouts: {}
  135. });
  136. initRequestSync({ windowId, sessionId, options });
  137. const frames = sessions.get(sessionId).frames;
  138. frames.sessionId = sessionId;
  139. return frames;
  140. }
  141. function cleanup(sessionId) {
  142. sessions.delete(sessionId);
  143. cleanupRequest({ windowId, sessionId, options: { sessionId } });
  144. }
  145. function getNewSessionId() {
  146. return globalThis.crypto.getRandomValues(new Uint32Array(32)).join("");
  147. }
  148. function initRequestSync(message) {
  149. const sessionId = message.sessionId;
  150. const waitForUserScript = globalThis._singleFile_waitForUserScript;
  151. if (!TOP_WINDOW) {
  152. windowId = globalThis.frameId = message.windowId;
  153. }
  154. processFrames(document, message.options, windowId, sessionId);
  155. if (!TOP_WINDOW) {
  156. if (message.options.userScriptEnabled && waitForUserScript) {
  157. waitForUserScript(helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  158. }
  159. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  160. if (message.options.userScriptEnabled && waitForUserScript) {
  161. waitForUserScript(helper.ON_AFTER_CAPTURE_EVENT_NAME);
  162. }
  163. delete document.documentElement.dataset.requestedFrameId;
  164. }
  165. }
  166. async function initRequestAsync(message) {
  167. const sessionId = message.sessionId;
  168. const waitForUserScript = globalThis._singleFile_waitForUserScript;
  169. if (!TOP_WINDOW) {
  170. windowId = globalThis.frameId = message.windowId;
  171. }
  172. processFrames(document, message.options, windowId, sessionId);
  173. if (!TOP_WINDOW) {
  174. if (message.options.userScriptEnabled && waitForUserScript) {
  175. await waitForUserScript(helper.ON_BEFORE_CAPTURE_EVENT_NAME);
  176. }
  177. sendInitResponse({ frames: [getFrameData(document, globalThis, windowId, message.options)], sessionId, requestedFrameId: document.documentElement.dataset.requestedFrameId && windowId });
  178. if (message.options.userScriptEnabled && waitForUserScript) {
  179. await waitForUserScript(helper.ON_AFTER_CAPTURE_EVENT_NAME);
  180. }
  181. delete document.documentElement.dataset.requestedFrameId;
  182. }
  183. }
  184. function cleanupRequest(message) {
  185. const sessionId = message.sessionId;
  186. cleanupFrames(getFrames(document), message.windowId, sessionId);
  187. }
  188. function initResponse(message) {
  189. message.frames.forEach(frameData => clearFrameTimeout("responseTimeouts", message.sessionId, frameData.windowId));
  190. const windowData = sessions.get(message.sessionId);
  191. if (windowData) {
  192. if (message.requestedFrameId) {
  193. windowData.requestedFrameId = message.requestedFrameId;
  194. }
  195. message.frames.forEach(messageFrameData => {
  196. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  197. if (!frameData) {
  198. frameData = { windowId: messageFrameData.windowId };
  199. windowData.frames.push(frameData);
  200. }
  201. if (!frameData.processed) {
  202. frameData.content = messageFrameData.content;
  203. frameData.baseURI = messageFrameData.baseURI;
  204. frameData.title = messageFrameData.title;
  205. frameData.canvases = messageFrameData.canvases;
  206. frameData.fonts = messageFrameData.fonts;
  207. frameData.stylesheets = messageFrameData.stylesheets;
  208. frameData.images = messageFrameData.images;
  209. frameData.posters = messageFrameData.posters;
  210. frameData.usedFonts = messageFrameData.usedFonts;
  211. frameData.shadowRoots = messageFrameData.shadowRoots;
  212. frameData.imports = messageFrameData.imports;
  213. frameData.processed = messageFrameData.processed;
  214. }
  215. });
  216. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  217. if (!remainingFrames) {
  218. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  219. if (windowData.resolve) {
  220. if (windowData.requestedFrameId) {
  221. windowData.frames.forEach(frameData => {
  222. if (frameData.windowId == windowData.requestedFrameId) {
  223. frameData.requestedFrame = true;
  224. }
  225. });
  226. }
  227. windowData.resolve(windowData.frames);
  228. }
  229. }
  230. }
  231. }
  232. function processFrames(doc, options, parentWindowId, sessionId) {
  233. const frameElements = getFrames(doc);
  234. processFramesAsync(doc, frameElements, options, parentWindowId, sessionId);
  235. if (frameElements.length) {
  236. processFramesSync(doc, frameElements, options, parentWindowId, sessionId);
  237. }
  238. }
  239. function processFramesAsync(doc, frameElements, options, parentWindowId, sessionId) {
  240. const frames = [];
  241. let requestTimeouts;
  242. if (sessions.get(sessionId)) {
  243. requestTimeouts = sessions.get(sessionId).requestTimeouts;
  244. } else {
  245. requestTimeouts = {};
  246. sessions.set(sessionId, { requestTimeouts });
  247. }
  248. frameElements.forEach((frameElement, frameIndex) => {
  249. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  250. frameElement.setAttribute(helper.WIN_ID_ATTRIBUTE_NAME, windowId);
  251. frames.push({ windowId });
  252. });
  253. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  254. frameElements.forEach((frameElement, frameIndex) => {
  255. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  256. try {
  257. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  258. } catch (error) {
  259. // ignored
  260. }
  261. requestTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId, processed: true }], sessionId }), TIMEOUT_INIT_REQUEST_MESSAGE);
  262. });
  263. delete doc.documentElement.dataset.requestedFrameId;
  264. }
  265. function processFramesSync(doc, frameElements, options, parentWindowId, sessionId) {
  266. const frames = [];
  267. frameElements.forEach((frameElement, frameIndex) => {
  268. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  269. let frameDoc;
  270. try {
  271. frameDoc = frameElement.contentDocument;
  272. } catch (error) {
  273. // ignored
  274. }
  275. if (frameDoc) {
  276. try {
  277. const frameWindow = frameElement.contentWindow;
  278. frameWindow.stop();
  279. clearFrameTimeout("requestTimeouts", sessionId, windowId);
  280. processFrames(frameDoc, options, windowId, sessionId);
  281. frames.push(getFrameData(frameDoc, frameWindow, windowId, options));
  282. } catch (error) {
  283. frames.push({ windowId, processed: true });
  284. }
  285. }
  286. });
  287. sendInitResponse({ frames, sessionId, requestedFrameId: doc.documentElement.dataset.requestedFrameId && parentWindowId });
  288. delete doc.documentElement.dataset.requestedFrameId;
  289. }
  290. function clearFrameTimeout(type, sessionId, windowId) {
  291. const session = sessions.get(sessionId);
  292. if (session && session[type]) {
  293. const timeout = session[type][windowId];
  294. if (timeout) {
  295. globalThis.clearTimeout(timeout);
  296. delete session[type][windowId];
  297. }
  298. }
  299. }
  300. function createFrameResponseTimeout(sessionId, windowId) {
  301. const session = sessions.get(sessionId);
  302. if (session && session.responseTimeouts) {
  303. session.responseTimeouts[windowId] = globalThis.setTimeout(() => sendInitResponse({ frames: [{ windowId: windowId, processed: true }], sessionId: sessionId }), TIMEOUT_INIT_RESPONSE_MESSAGE);
  304. }
  305. }
  306. function cleanupFrames(frameElements, parentWindowId, sessionId) {
  307. frameElements.forEach((frameElement, frameIndex) => {
  308. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  309. frameElement.removeAttribute(helper.WIN_ID_ATTRIBUTE_NAME);
  310. try {
  311. sendMessage(frameElement.contentWindow, { method: CLEANUP_REQUEST_MESSAGE, windowId, sessionId });
  312. } catch (error) {
  313. // ignored
  314. }
  315. });
  316. frameElements.forEach((frameElement, frameIndex) => {
  317. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  318. let frameDoc;
  319. try {
  320. frameDoc = frameElement.contentDocument;
  321. } catch (error) {
  322. // ignored
  323. }
  324. if (frameDoc) {
  325. try {
  326. cleanupFrames(getFrames(frameDoc), windowId, sessionId);
  327. } catch (error) {
  328. // ignored
  329. }
  330. }
  331. });
  332. }
  333. function sendInitResponse(message) {
  334. message.method = INIT_RESPONSE_MESSAGE;
  335. try {
  336. top.singlefile.processors.frameTree.initResponse(message);
  337. } catch (error) {
  338. sendMessage(top, message, true);
  339. }
  340. }
  341. function sendMessage(targetWindow, message, useChannel) {
  342. if (targetWindow == top && browser && browser.runtime && browser.runtime.sendMessage) {
  343. browser.runtime.sendMessage(message);
  344. } else {
  345. if (useChannel) {
  346. const channel = new MessageChannel();
  347. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method, sessionId: message.sessionId }), TARGET_ORIGIN, [channel.port2]);
  348. channel.port1.postMessage(message);
  349. } else {
  350. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  351. }
  352. }
  353. }
  354. function getFrameData(document, globalThis, windowId, options) {
  355. const docData = helper.preProcessDoc(document, globalThis, options);
  356. const content = helper.serialize(document);
  357. helper.postProcessDoc(document, docData.markedElements);
  358. const baseURI = document.baseURI.split("#")[0];
  359. return {
  360. windowId,
  361. content,
  362. baseURI,
  363. title: document.title,
  364. canvases: docData.canvases,
  365. fonts: docData.fonts,
  366. stylesheets: docData.stylesheets,
  367. images: docData.images,
  368. posters: docData.posters,
  369. usedFonts: docData.usedFonts,
  370. shadowRoots: docData.shadowRoots,
  371. imports: docData.imports,
  372. processed: true
  373. };
  374. }
  375. function getFrames(document) {
  376. let frames = Array.from(document.querySelectorAll(FRAMES_CSS_SELECTOR));
  377. document.querySelectorAll(ALL_ELEMENTS_CSS_SELECTOR).forEach(element => {
  378. const shadowRoot = helper.getShadowRoot(element);
  379. if (shadowRoot) {
  380. frames = frames.concat(...shadowRoot.querySelectorAll(FRAMES_CSS_SELECTOR));
  381. }
  382. });
  383. return frames;
  384. }