frame-tree.js 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. /* global window, top, document, addEventListener, docHelper, timeout, MessageChannel, superFetch, fetch, TextDecoder, DOMParser */
  21. this.frameTree = this.frameTree || (() => {
  22. const MESSAGE_PREFIX = "__frameTree__::";
  23. const FRAMES_CSS_SELECTOR = "iframe, frame, object[type=\"text/html\"][data]";
  24. const INIT_REQUEST_MESSAGE = "initRequest";
  25. const INIT_RESPONSE_MESSAGE = "initResponse";
  26. const TARGET_ORIGIN = "*";
  27. const TIMEOUT_INIT_REQUEST_MESSAGE = 500;
  28. const REGEXP_SIMPLE_QUOTES_STRING = /^'(.*?)'$/;
  29. const REGEXP_DOUBLE_QUOTES_STRING = /^"(.*?)"$/;
  30. const PREFIX_VALID_FRAME_URL = /^https?:\/\//;
  31. const TOP_WINDOW_ID = "0";
  32. const WINDOW_ID_SEPARATOR = ".";
  33. const TOP_WINDOW = window == top;
  34. const sessions = new Map();
  35. let windowId;
  36. if (TOP_WINDOW) {
  37. windowId = TOP_WINDOW_ID;
  38. }
  39. addEventListener("message", event => {
  40. if (typeof event.data == "string" && event.data.startsWith(MESSAGE_PREFIX)) {
  41. const message = JSON.parse(event.data.substring(MESSAGE_PREFIX.length));
  42. if (!TOP_WINDOW && message.method == INIT_REQUEST_MESSAGE) {
  43. window.stop();
  44. initRequest(message);
  45. } else if (message.method == INIT_RESPONSE_MESSAGE) {
  46. const port = event.ports[0];
  47. port.onmessage = event => initResponse(event.data);
  48. }
  49. }
  50. }, false);
  51. return {
  52. getAsync: async options => {
  53. const sessionId = options.sessionId;
  54. options = JSON.parse(JSON.stringify(options));
  55. return new Promise(resolve => {
  56. sessions.set(sessionId, { frames: [], resolve });
  57. initRequest({ windowId, sessionId, options });
  58. });
  59. },
  60. getSync: options => {
  61. const sessionId = options.sessionId;
  62. options = JSON.parse(JSON.stringify(options));
  63. sessions.set(sessionId, { frames: [] });
  64. initRequest({ windowId, sessionId, options });
  65. return sessions.get(sessionId).frames;
  66. },
  67. initResponse
  68. };
  69. function initRequest(message) {
  70. const sessionId = message.sessionId;
  71. const frameElements = document.querySelectorAll(FRAMES_CSS_SELECTOR);
  72. if (!TOP_WINDOW) {
  73. windowId = message.windowId;
  74. sendInitResponse({ framesData: [getFrameData(document, window, windowId, message.options)], sessionId });
  75. }
  76. processFrames(frameElements, message.options, windowId, sessionId);
  77. }
  78. function initResponse(message) {
  79. const windowData = sessions.get(message.sessionId);
  80. if (windowData) {
  81. message.framesData.forEach(messageFrameData => {
  82. let frameData = windowData.frames.find(frameData => messageFrameData.windowId == frameData.windowId);
  83. if (!frameData) {
  84. frameData = { windowId: messageFrameData.windowId };
  85. windowData.frames.push(frameData);
  86. }
  87. if (!frameData.processed) {
  88. frameData.content = messageFrameData.content;
  89. frameData.baseURI = messageFrameData.baseURI;
  90. frameData.title = messageFrameData.title;
  91. frameData.stylesheetContents = messageFrameData.stylesheetContents;
  92. frameData.imageData = messageFrameData.imageData;
  93. frameData.postersData = messageFrameData.postersData;
  94. frameData.canvasData = messageFrameData.canvasData;
  95. frameData.fontsData = messageFrameData.fontsData;
  96. frameData.usedFonts = messageFrameData.usedFonts;
  97. frameData.processed = messageFrameData.processed;
  98. frameData.timeout = messageFrameData.timeout;
  99. }
  100. });
  101. const remainingFrames = windowData.frames.filter(frameData => !frameData.processed).length;
  102. if (!remainingFrames) {
  103. sessions.delete(message.sessionId);
  104. windowData.frames = windowData.frames.sort((frame1, frame2) => frame2.windowId.split(WINDOW_ID_SEPARATOR).length - frame1.windowId.split(WINDOW_ID_SEPARATOR).length);
  105. if (windowData.resolve) {
  106. windowData.resolve(windowData.frames);
  107. }
  108. }
  109. }
  110. }
  111. function processFrames(frameElements, options, parentWindowId, sessionId) {
  112. processFramesAsync(frameElements, options, parentWindowId, sessionId);
  113. if (frameElements.length) {
  114. processFramesSync(frameElements, options, parentWindowId, sessionId);
  115. }
  116. }
  117. function processFramesAsync(frameElements, options, parentWindowId, sessionId) {
  118. const framesData = [];
  119. frameElements.forEach((frameElement, frameIndex) => {
  120. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  121. frameElement.setAttribute(docHelper.windowIdAttributeName(options.sessionId), windowId);
  122. framesData.push({ windowId });
  123. let contentDocument;
  124. try {
  125. contentDocument = frameElement.contentDocument;
  126. } catch (error) {
  127. // ignored
  128. }
  129. if (!contentDocument) {
  130. try {
  131. sendMessage(frameElement.contentWindow, { method: INIT_REQUEST_MESSAGE, windowId, sessionId, options });
  132. } catch (error) {
  133. /* ignored */
  134. }
  135. timeout.set(async () => {
  136. let frameDoc;
  137. if (frameElement.src && frameElement.src.match(PREFIX_VALID_FRAME_URL)) {
  138. frameDoc = await getFrameDoc(frameElement.src, parentWindowId, options);
  139. }
  140. if (frameDoc) {
  141. sendInitResponse({ framesData: [getFrameData(frameDoc, null, windowId, options)] });
  142. timeout.set(() => sendInitResponse({ framesData: [{ windowId, processed: true, timeout: true }], sessionId }));
  143. } else {
  144. sendInitResponse({ framesData: [{ windowId, processed: true, timeout: true }], sessionId });
  145. }
  146. }, TIMEOUT_INIT_REQUEST_MESSAGE);
  147. }
  148. });
  149. sendInitResponse({ framesData, sessionId });
  150. }
  151. function processFramesSync(frameElements, options, parentWindowId, sessionId) {
  152. const framesData = [];
  153. frameElements.forEach((frameElement, frameIndex) => {
  154. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  155. const frameDoc = frameElement.contentDocument;
  156. if (frameDoc) {
  157. try {
  158. frameElement.contentWindow.stop();
  159. processFrames(frameDoc.querySelectorAll(FRAMES_CSS_SELECTOR), options, windowId, sessionId);
  160. framesData.push(getFrameData(frameDoc, frameElement.contentWindow, windowId, options));
  161. } catch (error) {
  162. framesData.push({ windowId, processed: true });
  163. }
  164. }
  165. });
  166. sendInitResponse({ framesData, sessionId });
  167. }
  168. function sendInitResponse(message) {
  169. message.method = INIT_RESPONSE_MESSAGE;
  170. try {
  171. top.frameTree.initResponse(message);
  172. } catch (error) {
  173. sendMessage(top, message, true);
  174. }
  175. }
  176. function sendMessage(targetWindow, message, useChannel) {
  177. if (useChannel) {
  178. const channel = new MessageChannel();
  179. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify({ method: message.method }), TARGET_ORIGIN, [channel.port2]);
  180. channel.port1.postMessage(message);
  181. } else {
  182. targetWindow.postMessage(MESSAGE_PREFIX + JSON.stringify(message), TARGET_ORIGIN);
  183. }
  184. }
  185. async function getFrameDoc(frameUrl, parentWindowId, options) {
  186. let frameContent;
  187. try {
  188. frameContent = await ((typeof superFetch !== "undefined" && superFetch.fetch) || fetch)(frameUrl);
  189. } catch (error) {
  190. /* ignored */
  191. }
  192. if (frameContent && frameContent.status >= 400 && superFetch.hostFetch) {
  193. try {
  194. frameContent = await superFetch.hostFetch(frameUrl);
  195. } catch (error) {
  196. /* ignored */
  197. }
  198. }
  199. if (frameContent) {
  200. const contentType = frameContent.headers && frameContent.headers.get("content-type");
  201. let charSet, mimeType;
  202. if (contentType) {
  203. const matchContentType = contentType.toLowerCase().split(";");
  204. mimeType = matchContentType[0].trim();
  205. if (mimeType.indexOf("/") <= 0) {
  206. mimeType = "text/html";
  207. }
  208. const charSetValue = matchContentType[1] && matchContentType[1].trim();
  209. if (charSetValue) {
  210. const matchCharSet = charSetValue.match(/^charset=(.*)/);
  211. if (matchCharSet) {
  212. charSet = removeQuotes(matchCharSet[1]);
  213. }
  214. }
  215. }
  216. let doc;
  217. try {
  218. const buffer = await frameContent.arrayBuffer();
  219. const content = (new TextDecoder(charSet)).decode(buffer);
  220. const domParser = new DOMParser();
  221. doc = domParser.parseFromString(content, mimeType);
  222. } catch (error) {
  223. /* ignored */
  224. }
  225. if (doc) {
  226. const frameElements = doc.documentElement.querySelectorAll(FRAMES_CSS_SELECTOR);
  227. frameElements.forEach((frameElement, frameIndex) => {
  228. const windowId = parentWindowId + WINDOW_ID_SEPARATOR + frameIndex;
  229. frameElement.setAttribute(docHelper.windowIdAttributeName(options.sessionId), windowId);
  230. });
  231. return doc;
  232. }
  233. }
  234. }
  235. function removeQuotes(string) {
  236. string = string.toLowerCase().trim();
  237. if (string.match(REGEXP_SIMPLE_QUOTES_STRING)) {
  238. string = string.replace(REGEXP_SIMPLE_QUOTES_STRING, "$1");
  239. } else {
  240. string = string.replace(REGEXP_DOUBLE_QUOTES_STRING, "$1");
  241. }
  242. return string.trim();
  243. }
  244. function getFrameData(document, window, windowId, options) {
  245. const docData = docHelper.preProcessDoc(document, window, options);
  246. const content = docHelper.serialize(document);
  247. docHelper.postProcessDoc(document, options);
  248. const baseURI = document.baseURI.split("#")[0];
  249. return {
  250. windowId,
  251. content,
  252. baseURI,
  253. title: document.title,
  254. stylesheetContents: docData.stylesheetContents,
  255. imageData: docData.imageData,
  256. postersData: docData.postersData,
  257. canvasData: docData.canvasData,
  258. fontsData: docData.fontsData,
  259. usedFonts: docData.usedFonts,
  260. processed: true
  261. };
  262. }
  263. })();