1
0

jsdom.js 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global require, exports, Buffer, setTimeout */
  24. const crypto = require("crypto");
  25. const jsdom = require("jsdom");
  26. const { JSDOM, VirtualConsole } = jsdom;
  27. const iconv = require("iconv-lite");
  28. exports.initialize = async () => { };
  29. exports.getPageData = async options => {
  30. let win;
  31. try {
  32. const dom = await JSDOM.fromURL(options.url, await getBrowserOptions(options));
  33. win = dom.window;
  34. return await getPageData(win, options);
  35. } finally {
  36. if (win) {
  37. win.close();
  38. }
  39. }
  40. };
  41. exports.closeBrowser = () => { };
  42. async function getPageData(win, options) {
  43. const doc = win.document;
  44. const scripts = await require("./common/scripts.js").get(options);
  45. win.TextDecoder = class {
  46. constructor(utfLabel) {
  47. this.utfLabel = utfLabel;
  48. }
  49. decode(buffer) {
  50. return iconv.decode(Buffer.from(buffer), this.utfLabel);
  51. }
  52. };
  53. win.crypto = {
  54. subtle: {
  55. digest: async function digestText(algo, text) {
  56. const hash = crypto.createHash(algo.replace("-", "").toLowerCase());
  57. hash.update(text, "utf-8");
  58. return hash.digest();
  59. }
  60. }
  61. };
  62. win.Element.prototype.getBoundingClientRect = undefined;
  63. win.getComputedStyle = () => { };
  64. win.eval(scripts);
  65. if (win.document.readyState == "loading" || win.document.readyState == "interactive") {
  66. await new Promise(resolve => win.onload = resolve);
  67. }
  68. executeFrameScripts(doc, scripts);
  69. options.removeHiddenElements = false;
  70. options.loadDeferredImages = false;
  71. if (options.browserWaitDelay) {
  72. await new Promise(resolve => setTimeout(resolve, options.browserWaitDelay));
  73. }
  74. const pageData = await win.singlefile.getPageData(options, { fetch: url => fetchResource(url, options) }, doc, win);
  75. if (options.includeInfobar) {
  76. await win.common.ui.content.infobar.includeScript(pageData);
  77. }
  78. return pageData;
  79. async function fetchResource(resourceURL) {
  80. return new Promise((resolve, reject) => {
  81. const xhrRequest = new win.XMLHttpRequest();
  82. xhrRequest.withCredentials = true;
  83. xhrRequest.responseType = "arraybuffer";
  84. xhrRequest.onerror = event => reject(new Error(event.detail));
  85. xhrRequest.onreadystatechange = () => {
  86. if (xhrRequest.readyState == win.XMLHttpRequest.DONE) {
  87. resolve({
  88. arrayBuffer: async () => new Uint8Array(xhrRequest.response).buffer,
  89. headers: {
  90. get: headerName => xhrRequest.getResponseHeader(headerName)
  91. },
  92. status: xhrRequest.status
  93. });
  94. }
  95. };
  96. xhrRequest.open("GET", resourceURL, true);
  97. xhrRequest.send();
  98. });
  99. }
  100. }
  101. async function getBrowserOptions(options) {
  102. class ResourceLoader extends jsdom.ResourceLoader {
  103. _getRequestOptions(fetchOptions) {
  104. const requestOptions = super._getRequestOptions(fetchOptions);
  105. if (options.httpHeaders) {
  106. requestOptions.headers = Object.assign(requestOptions.headers, options.httpHeaders);
  107. }
  108. return requestOptions;
  109. }
  110. }
  111. const resourceLoader = new ResourceLoader({
  112. userAgent: options.userAgent
  113. });
  114. const jsdomOptions = {
  115. virtualConsole: new VirtualConsole(),
  116. userAgent: options.userAgent,
  117. pretendToBeVisual: true,
  118. runScripts: "outside-only",
  119. resources: resourceLoader
  120. };
  121. if (options.browserWidth && options.browserHeight) {
  122. jsdomOptions.beforeParse = function (window) {
  123. window.outerWidth = window.innerWidth = options.browserWidth;
  124. window.outerHeight = window.innerHeight = options.browserHeight;
  125. };
  126. }
  127. if (options.browserCookies && options.browserCookies.length) {
  128. jsdomOptions.cookieJar = new jsdom.CookieJar();
  129. await Promise.all(options.browserCookies.map(cookie => {
  130. let cookieString = cookie.name + "=" + cookie.value;
  131. if (cookie.path) {
  132. cookieString += ";path=" + cookie.path;
  133. }
  134. if (cookie.domain) {
  135. cookieString += ";domain=" + cookie.domain;
  136. }
  137. if (cookie.expires) {
  138. cookieString += ";max-age=" + cookie.expires;
  139. }
  140. if (cookie.secure) {
  141. cookieString += ";secure";
  142. }
  143. if (cookie.sameSite) {
  144. cookieString += ";samesite=" + options.sameSite;
  145. }
  146. const cookieOptions = {
  147. http: Boolean(cookie.httpOnly)
  148. };
  149. return new Promise((resolve, reject) => jsdomOptions.cookieJar.setCookie(cookieString, options.url, cookieOptions, error => error ? reject(error) : resolve()));
  150. }));
  151. }
  152. return jsdomOptions;
  153. }
  154. function executeFrameScripts(doc, scripts) {
  155. const frameElements = doc.querySelectorAll("iframe, frame");
  156. frameElements.forEach(frameElement => {
  157. try {
  158. frameElement.contentWindow.Element.prototype.getBoundingClientRect = undefined;
  159. frameElement.contentWindow.eval(scripts);
  160. executeFrameScripts(frameElement.contentDocument, scripts);
  161. } catch (error) {
  162. // ignored
  163. }
  164. });
  165. }