frame-tree.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. /*
  2. * Copyright 2010-2019 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global window, top, document, addEventListener, docHelper, timeout, MessageChannel, superFetch, fetch, TextDecoder, DOMParser, lazyLoader, setTimeout */
  21. this.frameTree = this.frameTree || (() => {
  22. const MESSAGE_PREFIX = "__frameTree__::";
  23. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  24. const INIT_REQUEST_MESSAGE = "initRequest";
  25. const CLEANUP_REQUEST_MESSAGE = "cleanupRequest";
  26. const INIT_RESPONSE_MESSAGE = "initResponse";
  27. const TARGET_ORIGIN = "*";
  28. const TIMEOUT_INIT_REQUEST_MESSAGE = 750;
  29. const PREFIX_VALID_FRAME_URL = /^https?:\/\//;
  30. const TOP_WINDOW_ID = "0";
  31. const WINDOW_ID_SEPARATOR = ".";
  32. const TOP_WINDOW = window == top;
  33. const sessions = new Map();
  34. let windowId;
  35. if (TOP_WINDOW) {
  36. windowId = TOP_WINDOW_ID;
  37. }
  38. addEventListener("message", event => {
  39. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  40. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  41. if (!TOP_WINDOW && message.method == INIT_REQUEST_MESSAGE) {
  42. window.stop();
  43. initRequest(message);
  44. if (message.options.loadDeferredImages && window.lazyLoader) {
  45. lazyLoader.process(message.options);
  46. }
  47. } else if (message.method == CLEANUP_REQUEST_MESSAGE) {
  48. cleanupRequest(message);
  49. } else if (message.method == INIT_RESPONSE_MESSAGE) {
  50. const port = event.ports[0];
  51. port.onmessage = event => initResponse(event.data);
  52. }
  53. event.preventDefault();
  54. event.stopPropagation();
  55. }
  56. }, true);
  57. return {
  58. getAsync: async options => {
  59. const sessionId = options.sessionId || 0;
  60. options = JSON.parse(JSON.stringify(options));
  61. return new Promise(resolve => {
  62. sessions.set(sessionId, { frames: [], resolve });
  63. initRequest({ windowId, sessionId, options });
  64. });
  65. },
  66. getSync: options => {
  67. const sessionId = options.sessionId || 0;
  68. options = JSON.parse(JSON.stringify(options));
  69. sessions.set(sessionId, { frames: [] });
  70. initRequest({ windowId, sessionId, options });
  71. return sessions.get(sessionId).frames;
  72. },
  73. cleanup: options => {
  74. const sessionId = options.sessionId || 0;
  75. options = JSON.parse(JSON.stringify(options));
  76. cleanupRequest({ windowId, sessionId, options });
  77. },
  78. initResponse,
  79. TIMEOUT_INIT_REQUEST_MESSAGE
  80. };
  81. function initRequest(message) {
  82. const sessionId = message.sessionId;
  83. const frameElements = document.querySelectorAll(FRAMES_CSS_SELECTOR);
  84. if (!TOP_WINDOW) {
  85. windowId = message.windowId;
  86. sendInitResponse({ framesData: [getFrameData(document, window, windowId, message.options)], sessionId });
  87. }
  88. processFrames(frameElements, message.options, windowId, sessionId);
  89. }
  90. function cleanupRequest(message) {
  91. const sessionId = message.sessionId;
  92. const frameElements = document.querySelectorAll(FRAMES_CSS_SELECTOR);
  93. if (!TOP_WINDOW) {
  94. windowId = message.windowId;
  95. }
  96. cleanupFrames(frameElements, message.options, windowId, sessionId);
  97. }
  98. function initResponse(message) {
  99. const windowData = sessions.get(message.sessionId);
  100. if (windowData) {
  101. message.framesData.forEach(messageFrameData => {
  102. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  103. if (!frameData) {
  104. frameData = { windowId: messageFrameData.windowId };
  105. windowData.frames.push(frameData);
  106. }
  107. if (!frameData.processed) {
  108. frameData.content = messageFrameData.content;
  109. frameData.baseURI = messageFrameData.baseURI;
  110. frameData.title = messageFrameData.title;
  111. frameData.stylesheetContents = messageFrameData.stylesheetContents;
  112. frameData.imageData = messageFrameData.imageData;
  113. frameData.postersData = messageFrameData.postersData;
  114. frameData.canvasData = messageFrameData.canvasData;
  115. frameData.fontsData = messageFrameData.fontsData;
  116. frameData.usedFonts = messageFrameData.usedFonts;
  117. frameData.shadowRootContents = messageFrameData.shadowRootContents;
  118. frameData.processed = messageFrameData.processed;
  119. }
  120. });
  121. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  122. if (!remainingFrames) {
  123. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  124. if (windowData.resolve) {
  125. windowData.resolve(windowData.frames);
  126. sessions.delete(message.sessionId);
  127. }
  128. }
  129. }
  130. }
  131. function processFrames(frameElements, options, parentWindowId, sessionId) {
  132. processFramesAsync(frameElements, options, parentWindowId, sessionId);
  133. if (frameElements.length) {
  134. processFramesSync(frameElements, options, parentWindowId, sessionId);
  135. }
  136. }
  137. function processFramesAsync(frameElements, options, parentWindowId, sessionId) {
  138. const framesData = [];
  139. frameElements.forEach((frameElement, frameIndex) => {
  140. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  141. frameElement.setAttribute(docHelper.WIN_ID_ATTRIBUTE_NAME, windowId);
  142. framesData.push({ windowId });
  143. try {
  144. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  145. } catch (error) {
  146. /* ignored */
  147. }
  148. setTimeout(async () => {
  149. let frameDoc;
  150. if (frameElement.src && frameElement.src.match(PREFIX_VALID_FRAME_URL)) {
  151. frameDoc = await getFrameDoc(frameElement.src, parentWindowId);
  152. }
  153. if (frameDoc) {
  154. sendInitResponse({ framesData: [getFrameData(frameDoc, null, windowId, options)] });
  155. timeout.set(() => sendInitResponse({ framesData: [{ windowId, processed: true }], sessionId }));
  156. } else {
  157. sendInitResponse({ framesData: [{ windowId, processed: true }], sessionId });
  158. }
  159. }, TIMEOUT_INIT_REQUEST_MESSAGE);
  160. });
  161. sendInitResponse({ framesData, sessionId });
  162. }
  163. function processFramesSync(frameElements, options, parentWindowId, sessionId) {
  164. const framesData = [];
  165. frameElements.forEach((frameElement, frameIndex) => {
  166. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  167. let frameDoc;
  168. try {
  169. frameDoc = frameElement.contentDocument;
  170. } catch (error) {
  171. // ignored
  172. }
  173. if (frameDoc) {
  174. try {
  175. frameElement.contentWindow.stop();
  176. processFrames(frameDoc.querySelectorAll(FRAMES_CSS_SELECTOR), options, windowId, sessionId);
  177. framesData.push(getFrameData(frameDoc, frameElement.contentWindow, windowId, options));
  178. } catch (error) {
  179. framesData.push({ windowId, processed: true });
  180. }
  181. }
  182. });
  183. sendInitResponse({ framesData, sessionId });
  184. }
  185. function cleanupFrames(frameElements, options, parentWindowId, sessionId) {
  186. frameElements.forEach((frameElement, frameIndex) => {
  187. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  188. frameElement.removeAttribute(docHelper.WIN_ID_ATTRIBUTE_NAME);
  189. try {
  190. sendMessage(frameElement.contentWindow, { method: CLEANUP_REQUEST_MESSAGE, windowId, sessionId, options });
  191. } catch (error) {
  192. /* ignored */
  193. }
  194. });
  195. frameElements.forEach((frameElement, frameIndex) => {
  196. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  197. let frameDoc;
  198. try {
  199. frameDoc = frameElement.contentDocument;
  200. } catch (error) {
  201. // ignored
  202. }
  203. if (frameDoc) {
  204. try {
  205. cleanupFrames(frameDoc.querySelectorAll(FRAMES_CSS_SELECTOR), options, windowId, sessionId);
  206. } catch (error) {
  207. // ignored
  208. }
  209. }
  210. });
  211. }
  212. function sendInitResponse(message) {
  213. message.method = INIT_RESPONSE_MESSAGE;
  214. try {
  215. top.frameTree.initResponse(message);
  216. } catch (error) {
  217. sendMessage(top, message, true);
  218. }
  219. }
  220. function sendMessage(targetWindow, message, useChannel) {
  221. if (useChannel) {
  222. const channel = new MessageChannel();
  223. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method }), TARGET_ORIGIN, [channel.port2]);
  224. channel.port1.postMessage(message);
  225. } else {
  226. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  227. }
  228. }
  229. async function getFrameDoc(frameUrl, parentWindowId) {
  230. let frameContent;
  231. try {
  232. frameContent = await ((typeof superFetch !== "undefined" && superFetch.fetch) || fetch)(frameUrl);
  233. } catch (error) {
  234. /* ignored */
  235. }
  236. if (frameContent && frameContent.status >= 400 && superFetch.hostFetch) {
  237. try {
  238. frameContent = await superFetch.hostFetch(frameUrl);
  239. } catch (error) {
  240. /* ignored */
  241. }
  242. }
  243. if (frameContent) {
  244. const contentType = frameContent.headers && frameContent.headers.get("content-type");
  245. let charset, mimeType;
  246. if (contentType) {
  247. const matchContentType = contentType.toLowerCase().split(";");
  248. mimeType = matchContentType[0].trim();
  249. if (!mimeType.includes("/")) {
  250. mimeType = "text/html";
  251. }
  252. const charsetValue = matchContentType[1] && matchContentType[1].trim();
  253. if (charsetValue) {
  254. const matchCharset = charsetValue.match(/^charset=(.*)/);
  255. if (matchCharset) {
  256. charset = docHelper.removeQuotes(matchCharset[1]);
  257. }
  258. }
  259. }
  260. let doc;
  261. try {
  262. const buffer = await frameContent.arrayBuffer();
  263. const content = (new TextDecoder(charset)).decode(buffer);
  264. const domParser = new DOMParser();
  265. doc = domParser.parseFromString(content, mimeType);
  266. } catch (error) {
  267. /* ignored */
  268. }
  269. if (doc) {
  270. const frameElements = doc.documentElement.querySelectorAll(FRAMES_CSS_SELECTOR);
  271. frameElements.forEach((frameElement, frameIndex) => {
  272. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  273. frameElement.setAttribute(docHelper.WIN_ID_ATTRIBUTE_NAME, windowId);
  274. });
  275. return doc;
  276. }
  277. }
  278. }
  279. function getFrameData(document, window, windowId, options) {
  280. const docData = docHelper.preProcessDoc(document, window, options);
  281. const content = docHelper.serialize(document);
  282. docHelper.postProcessDoc(document, options);
  283. const baseURI = document.baseURI.split("#")[0];
  284. return {
  285. windowId,
  286. content,
  287. baseURI,
  288. title: document.title,
  289. stylesheetContents: docData.stylesheetContents,
  290. imageData: docData.imageData,
  291. postersData: docData.postersData,
  292. canvasData: docData.canvasData,
  293. fontsData: docData.fontsData,
  294. usedFonts: docData.usedFonts,
  295. shadowRootContents: docData.shadowRootContents,
  296. processed: true
  297. };
  298. }
  299. })();