Просмотр исходного кода

integration of Readability-readerable.js

Former-commit-id: 7c259b8a027ff7aa6b1b38910e3938d240423a3f
Gildas 6 лет назад
Родитель
Сommit
acc4228bf4

+ 99 - 0
extension/lib/readability/Readability-readerable.js

@@ -0,0 +1,99 @@
+/* eslint-env es6:false */
+/* globals exports */
+/*
+ * Copyright (c) 2010 Arc90 Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This code is heavily based on Arc90's readability.js (1.7.1) script
+ * available at: http://code.google.com/p/arc90labs-readability
+ */
+
+var REGEXPS = {
+    // NOTE: These two regular expressions are duplicated in
+    // Readability.js. Please keep both copies in sync.
+    unlikelyCandidates: /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,
+    okMaybeItsACandidate: /and|article|body|column|content|main|shadow/i,
+  };
+  
+  function isNodeVisible(node) {
+    // Have to null-check node.style to deal with SVG and MathML nodes.
+    return (!node.style || node.style.display != "none") && !node.hasAttribute("hidden")
+      && (!node.hasAttribute("aria-hidden") || node.getAttribute("aria-hidden") != "true");
+  }
+  
+  /**
+   * Decides whether or not the document is reader-able without parsing the whole thing.
+   *
+   * @return boolean Whether or not we suspect Readability.parse() will suceeed at returning an article object.
+   */
+  function isProbablyReaderable(doc, isVisible) {
+    if (!isVisible) {
+      isVisible = isNodeVisible;
+    }
+  
+    var nodes = doc.querySelectorAll("p, pre");
+  
+    // Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
+    // Some articles' DOM structures might look like
+    // <div>
+    //   Sentences<br>
+    //   <br>
+    //   Sentences<br>
+    // </div>
+    var brNodes = doc.querySelectorAll("div > br");
+    if (brNodes.length) {
+      var set = new Set(nodes);
+      [].forEach.call(brNodes, function(node) {
+        set.add(node.parentNode);
+      });
+      nodes = Array.from(set);
+    }
+  
+    var score = 0;
+    // This is a little cheeky, we use the accumulator 'score' to decide what to return from
+    // this callback:
+    return [].some.call(nodes, function(node) {
+      if (!isVisible(node))
+        return false;
+  
+      var matchString = node.className + " " + node.id;
+      if (REGEXPS.unlikelyCandidates.test(matchString) &&
+          !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+        return false;
+      }
+  
+      if (node.matches("li p")) {
+        return false;
+      }
+  
+      var textContentLength = node.textContent.trim().length;
+      if (textContentLength < 140) {
+        return false;
+      }
+  
+      score += Math.sqrt(textContentLength - 140);
+  
+      if (score > 20) {
+        return true;
+      }
+      return false;
+    });
+  }
+  
+  if (typeof exports === "object") {
+    exports.isProbablyReaderable = isProbablyReaderable;
+  }
+  

+ 3 - 0
extension/ui/bg/ui-editor.js

@@ -188,6 +188,9 @@ singlefile.extension.ui.bg.editor = (() => {
 			tabData.options.openEditor = false;
 			tabData.options.openEditor = false;
 			singlefile.extension.core.content.download.downloadPage(pageData, tabData.options);
 			singlefile.extension.core.content.download.downloadPage(pageData, tabData.options);
 		}
 		}
+		if (message.method == "disableFormatPage") {
+			formatPageButton.classList.remove("format-disabled");
+		}
 	};
 	};
 	window.onload = browser.runtime.sendMessage({ method: "editor.getTabData" });
 	window.onload = browser.runtime.sendMessage({ method: "editor.getTabData" });
 
 

+ 4 - 1
extension/ui/content/content-ui-editor-web.js

@@ -21,7 +21,7 @@
  *   Source.
  *   Source.
  */
  */
 
 
-/* global singlefile, window, document, fetch, DOMParser, getComputedStyle, setTimeout, clearTimeout, NodeFilter, Readability */
+/* global singlefile, window, document, fetch, DOMParser, getComputedStyle, setTimeout, clearTimeout, NodeFilter, Readability, isProbablyReaderable */
 
 
 (async () => {
 (async () => {
 
 
@@ -833,6 +833,9 @@ table {
 			deserializeShadowRoots(document);
 			deserializeShadowRoots(document);
 			const iconElement = document.querySelector("link[rel*=icon]");
 			const iconElement = document.querySelector("link[rel*=icon]");
 			window.parent.postMessage(JSON.stringify({ "method": "setMetadata", title: document.title, icon: iconElement && iconElement.href }), "*");
 			window.parent.postMessage(JSON.stringify({ "method": "setMetadata", title: document.title, icon: iconElement && iconElement.href }), "*");
+			if (!isProbablyReaderable(document)) {
+				window.parent.postMessage(JSON.stringify({ "method": "disableFormatPage" }), "*");
+			}
 			document.querySelectorAll(NOTE_TAGNAME).forEach(containerElement => attachNoteListeners(containerElement, true));
 			document.querySelectorAll(NOTE_TAGNAME).forEach(containerElement => attachNoteListeners(containerElement, true));
 			document.documentElement.appendChild(getStyleElement(HIGHLIGHTS_WEB_STYLESHEET));
 			document.documentElement.appendChild(getStyleElement(HIGHLIGHTS_WEB_STYLESHEET));
 			maskPageElement = getMaskElement(PAGE_MASK_CLASS, PAGE_MASK_CONTAINER_CLASS);
 			maskPageElement = getMaskElement(PAGE_MASK_CLASS, PAGE_MASK_CONTAINER_CLASS);

+ 1 - 1
extension/ui/editor/editor.html

@@ -51,7 +51,7 @@
 		</div>
 		</div>
 	</div>
 	</div>
 	<iframe class="editor"
 	<iframe class="editor"
-		srcdoc="&lt;!DOCTYPE html&gt; &lt;body&gt;&lt;script src=/extension/ui/content/content-ui-editor-web.js&gt;&lt;/script&gt;&lt;script src=/lib/single-file/index.js&gt;&lt;/script&gt;&lt;script src=/lib/single-file/modules/html-serializer.js&gt;&lt;/script&gt;&lt;script src=/extension/lib/readability/Readability.js&gt;&lt;/script&gt;&lt;/body&gt;"
+		srcdoc="&lt;!DOCTYPE html&gt; &lt;body&gt;&lt;script src=/extension/ui/content/content-ui-editor-web.js&gt;&lt;/script&gt;&lt;script src=/lib/single-file/index.js&gt;&lt;/script&gt;&lt;script src=/lib/single-file/modules/html-serializer.js&gt;&lt;/script&gt;&lt;script src=/extension/lib/readability/Readability.js&gt;&lt;/script&gt;&lt;/script&gt;&lt;script src=/extension/lib/readability/Readability-readerable.js&gt;&lt;/script&gt;&lt;/body&gt;"
 		sandbox="allow-scripts"></iframe>
 		sandbox="allow-scripts"></iframe>
 	<script type="text/javascript"
 	<script type="text/javascript"
 		src="/extension/lib/single-file/browser-polyfill/chrome-browser-polyfill.js"></script>
 		src="/extension/lib/single-file/browser-polyfill/chrome-browser-polyfill.js"></script>

+ 1 - 0
manifest.json

@@ -145,6 +145,7 @@
 		"lib/single-file/processors/hooks/content/content-hooks-frames-web.js",
 		"lib/single-file/processors/hooks/content/content-hooks-frames-web.js",
 		"common/ui/content/content-infobar-web.js",
 		"common/ui/content/content-infobar-web.js",
 		"extension/lib/readability/Readability.js",
 		"extension/lib/readability/Readability.js",
+		"extension/lib/readability/Readability-readerable.js",
 		"extension/ui/content/content-ui-editor-web.js",
 		"extension/ui/content/content-ui-editor-web.js",
 		"extension/ui/editor/editor-note-web.css",
 		"extension/ui/editor/editor-note-web.css",
 		"extension/ui/editor/editor-mask-web.css",
 		"extension/ui/editor/editor-mask-web.css",