single-file-node.js 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /*
  2. * Copyright 2018 Gildas Lormeau
  3. * contact : gildas.lormeau <at> gmail.com
  4. *
  5. * This file is part of SingleFile.
  6. *
  7. * SingleFile is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Lesser General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * SingleFile is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public License
  18. * along with SingleFile. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. const SingleFileCore = require("./single-file-core");
  21. // --------
  22. // Download
  23. // --------
  24. const USER_AGENT = "Mozilla/5.0 (compatible; SingleFile Bot/1.0)";
  25. const request = require("request-promise-native");
  26. const dataUri = require("strong-data-uri");
  27. const iconv = require("iconv-lite");
  28. const http = require("http");
  29. const https = require("https");
  30. http.globalAgent.maxSockets = 5;
  31. https.globalAgent.maxSockets = 5;
  32. class Download {
  33. static async getContent(resourceURL, asDataURI) {
  34. const requestOptions = {
  35. method: "GET",
  36. uri: resourceURL,
  37. resolveWithFullResponse: true,
  38. encoding: null,
  39. headers: {
  40. "User-Agent": USER_AGENT
  41. }
  42. };
  43. let resourceContent;
  44. try {
  45. resourceContent = await request(requestOptions);
  46. } catch (e) {
  47. return asDataURI ? "data:base64," : "";
  48. }
  49. if (asDataURI) {
  50. try {
  51. return dataUri.encode(resourceContent.body, resourceContent.headers["content-type"]);
  52. } catch (e) {
  53. return "data:base64,";
  54. }
  55. } else {
  56. const matchCharset = resourceContent.headers["content-type"].match(/\s*;\s*charset\s*=\s*(.*)(;|$)/i);
  57. if (matchCharset && matchCharset[1]) {
  58. try {
  59. return iconv.decode(resourceContent.body, matchCharset[1]);
  60. } catch (e) {
  61. return resourceContent.body.toString("utf8");
  62. }
  63. } else {
  64. return resourceContent.body.toString("utf8");
  65. }
  66. }
  67. }
  68. }
  69. // ---
  70. // URL
  71. // ---
  72. const url = require("url");
  73. class URL {
  74. constructor(resourceUrl, baseURI) {
  75. this.href = url.resolve(baseURI, resourceUrl);
  76. }
  77. }
  78. // ---
  79. // DOM
  80. // ---
  81. const jsdom = require("jsdom");
  82. class DOM {
  83. static create(pageContent, url) {
  84. const dom = new jsdom.JSDOM(pageContent, { url, virtualConsole: new jsdom.VirtualConsole(), userAgent: USER_AGENT });
  85. return {
  86. DOMParser: dom.window.DOMParser,
  87. document: dom.window.document,
  88. serialize: () => dom.serialize()
  89. };
  90. }
  91. }
  92. module.exports = SingleFileCore(Download, DOM, URL);