jsdom.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. /*
  2. * Copyright 2010-2020 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * The code in this file is free software: you can redistribute it and/or
  8. * modify it under the terms of the GNU Affero General Public License
  9. * (GNU AGPL) as published by the Free Software Foundation, either version 3
  10. * of the License, or (at your option) any later version.
  11. *
  12. * The code in this file is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
  15. * General Public License for more details.
  16. *
  17. * As additional permission under GNU AGPL version 3 section 7, you may
  18. * distribute UNMODIFIED VERSIONS OF THIS file without the copy of the GNU
  19. * AGPL normally required by section 4, provided you include this license
  20. * notice and a URL through which recipients can access the Corresponding
  21. * Source.
  22. */
  23. /* global require, exports, Buffer */
  24. const crypto = require("crypto");
  25. const jsdom = require("jsdom");
  26. const { JSDOM, VirtualConsole } = jsdom;
  27. const iconv = require("iconv-lite");
  28. exports.initialize = async () => { };
  29. exports.getPageData = async options => {
  30. let win;
  31. try {
  32. const dom = await JSDOM.fromURL(options.url, await getBrowserOptions(options));
  33. win = dom.window;
  34. return await getPageData(win, options);
  35. } finally {
  36. if (win) {
  37. win.close();
  38. }
  39. }
  40. };
  41. exports.closeBrowser = () => { };
  42. async function getPageData(win, options) {
  43. const doc = win.document;
  44. const scripts = await require("./common/scripts.js").get(options);
  45. win.TextDecoder = class {
  46. constructor(utfLabel) {
  47. this.utfLabel = utfLabel;
  48. }
  49. decode(buffer) {
  50. return iconv.decode(Buffer.from(buffer), this.utfLabel);
  51. }
  52. };
  53. win.crypto = {
  54. subtle: {
  55. digest: async function digestText(algo, text) {
  56. const hash = crypto.createHash(algo.replace("-", "").toLowerCase());
  57. hash.update(text, "utf-8");
  58. return hash.digest();
  59. }
  60. }
  61. };
  62. win.Element.prototype.getBoundingClientRect = undefined;
  63. win.getComputedStyle = () => { };
  64. win.eval(scripts);
  65. if (win.document.readyState == "loading" || win.document.readyState == "interactive") {
  66. await new Promise(resolve => win.onload = resolve);
  67. }
  68. executeFrameScripts(doc, scripts);
  69. options.removeHiddenElements = false;
  70. options.loadDeferredImages = false;
  71. const pageData = await win.singlefile.lib.getPageData(options, { fetch: url => fetchResource(url, options) }, doc, win);
  72. if (options.includeInfobar) {
  73. await win.singlefile.common.ui.content.infobar.includeScript(pageData);
  74. }
  75. return pageData;
  76. async function fetchResource(resourceURL) {
  77. return new Promise((resolve, reject) => {
  78. const xhrRequest = new win.XMLHttpRequest();
  79. xhrRequest.withCredentials = true;
  80. xhrRequest.responseType = "arraybuffer";
  81. xhrRequest.onerror = event => reject(new Error(event.detail));
  82. xhrRequest.onreadystatechange = () => {
  83. if (xhrRequest.readyState == win.XMLHttpRequest.DONE) {
  84. resolve({
  85. arrayBuffer: async () => new Uint8Array(xhrRequest.response).buffer,
  86. headers: {
  87. get: headerName => xhrRequest.getResponseHeader(headerName)
  88. },
  89. status: xhrRequest.status
  90. });
  91. }
  92. };
  93. xhrRequest.open("GET", resourceURL, true);
  94. xhrRequest.send();
  95. });
  96. }
  97. }
  98. async function getBrowserOptions(options) {
  99. class ResourceLoader extends jsdom.ResourceLoader {
  100. _getRequestOptions(fetchOptions) {
  101. const requestOptions = super._getRequestOptions(fetchOptions);
  102. if (options.httpHeaders) {
  103. requestOptions.headers = Object.assign(requestOptions.headers, options.httpHeaders);
  104. }
  105. return requestOptions;
  106. }
  107. }
  108. const resourceLoader = new ResourceLoader({
  109. userAgent: options.userAgent
  110. });
  111. const jsdomOptions = {
  112. virtualConsole: new VirtualConsole(),
  113. userAgent: options.userAgent,
  114. pretendToBeVisual: true,
  115. runScripts: "outside-only",
  116. resources: resourceLoader
  117. };
  118. if (options.browserWidth && options.browserHeight) {
  119. jsdomOptions.beforeParse = function (window) {
  120. window.outerWidth = window.innerWidth = options.browserWidth;
  121. window.outerHeight = window.innerHeight = options.browserHeight;
  122. };
  123. }
  124. if (options.browserCookies && options.browserCookies.length) {
  125. jsdomOptions.cookieJar = new jsdom.CookieJar();
  126. await Promise.all(options.browserCookies.map(cookie => {
  127. let cookieString = cookie.name + "=" + cookie.value;
  128. if (cookie.path) {
  129. cookieString += ";path=" + cookie.path;
  130. }
  131. if (cookie.domain) {
  132. cookieString += ";domain=" + cookie.domain;
  133. }
  134. if (cookie.expires) {
  135. cookieString += ";max-age=" + cookie.expires;
  136. }
  137. if (cookie.secure) {
  138. cookieString += ";secure";
  139. }
  140. if (cookie.sameSite) {
  141. cookieString += ";samesite=" + options.sameSite;
  142. }
  143. const cookieOptions = {
  144. http: Boolean(cookie.httpOnly)
  145. };
  146. return new Promise((resolve, reject) => jsdomOptions.cookieJar.setCookie(cookieString, options.url, cookieOptions, error => error ? reject(error) : resolve()));
  147. }));
  148. }
  149. return jsdomOptions;
  150. }
  151. function executeFrameScripts(doc, scripts) {
  152. const frameElements = doc.querySelectorAll("iframe, frame");
  153. frameElements.forEach(frameElement => {
  154. try {
  155. frameElement.contentWindow.Element.prototype.getBoundingClientRect = undefined;
  156. frameElement.contentWindow.eval(scripts);
  157. executeFrameScripts(frameElement.contentDocument, scripts);
  158. } catch (error) {
  159. // ignored
  160. }
  161. });
  162. }