filter.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. //! Filtering and sorting the list of files before displaying them.
  2. use std::cmp::Ordering;
  3. use std::iter::FromIterator;
  4. use std::os::unix::fs::MetadataExt;
  5. use std::path::Path;
  6. use glob;
  7. use natord;
  8. use fs::File;
  9. use fs::DotFilter;
  10. /// The **file filter** processes a list of files before displaying them to
  11. /// the user, by removing files they don’t want to see, and putting the list
  12. /// in the desired order.
  13. ///
  14. /// Usually a user does not want to see *every* file in the list. The most
  15. /// common case is to remove files starting with `.`, which are designated
  16. /// as ‘hidden’ files.
  17. ///
  18. /// The special files `.` and `..` files are not actually filtered out, but
  19. /// need to be inserted into the list, in a special case.
  20. ///
  21. /// The filter also governs sorting the list. After being filtered, pairs of
  22. /// files are compared and sorted based on the result, with the sort field
  23. /// performing the comparison.
  24. #[derive(PartialEq, Debug, Clone)]
  25. pub struct FileFilter {
  26. /// Whether directories should be listed first, and other types of file
  27. /// second. Some users prefer it like this.
  28. pub list_dirs_first: bool,
  29. /// The metadata field to sort by.
  30. pub sort_field: SortField,
  31. /// Whether to reverse the sorting order. This would sort the largest
  32. /// files first, or files starting with Z, or the most-recently-changed
  33. /// ones, depending on the sort field.
  34. pub reverse: bool,
  35. /// Whether to only show directories.
  36. pub only_dirs: bool,
  37. /// Which invisible “dot” files to include when listing a directory.
  38. ///
  39. /// Files starting with a single “.” are used to determine “system” or
  40. /// “configuration” files that should not be displayed in a regular
  41. /// directory listing, and the directory entries “.” and “..” are
  42. /// considered extra-special.
  43. ///
  44. /// This came about more or less by a complete historical accident,
  45. /// when the original `ls` tried to hide `.` and `..`:
  46. /// https://plus.google.com/+RobPikeTheHuman/posts/R58WgWwN9jp
  47. ///
  48. /// When one typed ls, however, these files appeared, so either Ken or
  49. /// Dennis added a simple test to the program. It was in assembler then,
  50. /// but the code in question was equivalent to something like this:
  51. /// if (name[0] == '.') continue;
  52. /// This statement was a little shorter than what it should have been,
  53. /// which is:
  54. /// if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue;
  55. /// but hey, it was easy.
  56. ///
  57. /// Two things resulted.
  58. ///
  59. /// First, a bad precedent was set. A lot of other lazy programmers
  60. /// introduced bugs by making the same simplification. Actual files
  61. /// beginning with periods are often skipped when they should be counted.
  62. ///
  63. /// Second, and much worse, the idea of a "hidden" or "dot" file was
  64. /// created. As a consequence, more lazy programmers started dropping
  65. /// files into everyone's home directory. I don't have all that much
  66. /// stuff installed on the machine I'm using to type this, but my home
  67. /// directory has about a hundred dot files and I don't even know what
  68. /// most of them are or whether they're still needed. Every file name
  69. /// evaluation that goes through my home directory is slowed down by
  70. /// this accumulated sludge.
  71. pub dot_filter: DotFilter,
  72. /// Glob patterns to ignore. Any file name that matches *any* of these
  73. /// patterns won’t be displayed in the list.
  74. pub ignore_patterns: IgnorePatterns,
  75. /// Whether to ignore Git-ignored patterns.
  76. /// This is implemented completely separately from the actual Git
  77. /// repository scanning — a `.gitignore` file will still be scanned even
  78. /// if there’s no `.git` folder present.
  79. pub git_ignore: GitIgnore,
  80. }
  81. impl FileFilter {
  82. /// Remove every file in the given vector that does *not* pass the
  83. /// filter predicate for files found inside a directory.
  84. pub fn filter_child_files(&self, files: &mut Vec<File>) {
  85. files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
  86. if self.only_dirs {
  87. files.retain(|f| f.is_directory());
  88. }
  89. }
  90. /// Remove every file in the given vector that does *not* pass the
  91. /// filter predicate for file names specified on the command-line.
  92. ///
  93. /// The rules are different for these types of files than the other
  94. /// type because the ignore rules can be used with globbing. For
  95. /// example, running `exa -I='*.tmp' .vimrc` shouldn’t filter out the
  96. /// dotfile, because it’s been directly specified. But running
  97. /// `exa -I='*.ogg' music/*` should filter out the ogg files obtained
  98. /// from the glob, even though the globbing is done by the shell!
  99. pub fn filter_argument_files(&self, files: &mut Vec<File>) {
  100. files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
  101. }
  102. /// Sort the files in the given vector based on the sort field option.
  103. pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
  104. where F: AsRef<File<'a>> {
  105. files.sort_by(|a, b| self.sort_field.compare_files(a.as_ref(), b.as_ref()));
  106. if self.reverse {
  107. files.reverse();
  108. }
  109. if self.list_dirs_first {
  110. // This relies on the fact that `sort_by` is *stable*: it will keep
  111. // adjacent elements next to each other.
  112. files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
  113. }
  114. }
  115. }
  116. /// User-supplied field to sort by.
  117. #[derive(PartialEq, Debug, Copy, Clone)]
  118. pub enum SortField {
  119. /// Don’t apply any sorting. This is usually used as an optimisation in
  120. /// scripts, where the order doesn’t matter.
  121. Unsorted,
  122. /// The file name. This is the default sorting.
  123. Name(SortCase),
  124. /// The file’s extension, with extensionless files being listed first.
  125. Extension(SortCase),
  126. /// The file’s size, in bytes.
  127. Size,
  128. /// The file’s inode, which usually corresponds to the order in which
  129. /// files were created on the filesystem, more or less.
  130. FileInode,
  131. /// The time the file was modified (the “mtime”).
  132. ///
  133. /// As this is stored as a Unix timestamp, rather than a local time
  134. /// instance, the time zone does not matter and will only be used to
  135. /// display the timestamps, not compare them.
  136. ModifiedDate,
  137. /// The time the file was accessed (the “atime”).
  138. ///
  139. /// Oddly enough, this field rarely holds the *actual* accessed time.
  140. /// Recording a read time means writing to the file each time it’s read
  141. /// slows the whole operation down, so many systems will only update the
  142. /// timestamp in certain circumstances. This has become common enough that
  143. /// it’s now expected behaviour!
  144. /// http://unix.stackexchange.com/a/8842
  145. AccessedDate,
  146. /// The time the file was changed or created (the “ctime”).
  147. ///
  148. /// Contrary to the name, this field is used to mark the time when a
  149. /// file’s metadata changed -- its permissions, owners, or link count.
  150. ///
  151. /// In original Unix, this was, however, meant as creation time.
  152. /// https://www.bell-labs.com/usr/dmr/www/cacm.html
  153. CreatedDate,
  154. /// The type of the file: directories, links, pipes, regular, files, etc.
  155. ///
  156. /// Files are ordered according to the `PartialOrd` implementation of
  157. /// `fs::fields::Type`, so changing that will change this.
  158. FileType,
  159. /// The “age” of the file, which is the time it was modified sorted
  160. /// backwards. The reverse of the `ModifiedDate` ordering!
  161. ///
  162. /// It turns out that listing the most-recently-modified files first is a
  163. /// common-enough use case that it deserves its own variant. This would be
  164. /// implemented by just using the modified date and setting the reverse
  165. /// flag, but this would make reversing *that* output not work, which is
  166. /// bad, even though that’s kind of nonsensical. So it’s its own variant
  167. /// that can be reversed like usual.
  168. ModifiedAge,
  169. /// The file's name, however if the name of the file begins with `.`
  170. /// ignore the leading `.` and then sort as Name
  171. NameMixHidden(SortCase),
  172. }
  173. /// Whether a field should be sorted case-sensitively or case-insensitively.
  174. /// This determines which of the `natord` functions to use.
  175. ///
  176. /// I kept on forgetting which one was sensitive and which one was
  177. /// insensitive. Would a case-sensitive sort put capital letters first because
  178. /// it takes the case of the letters into account, or intermingle them with
  179. /// lowercase letters because it takes the difference between the two cases
  180. /// into account? I gave up and just named these two variants after the
  181. /// effects they have.
  182. #[derive(PartialEq, Debug, Copy, Clone)]
  183. pub enum SortCase {
  184. /// Sort files case-sensitively with uppercase first, with ‘A’ coming
  185. /// before ‘a’.
  186. ABCabc,
  187. /// Sort files case-insensitively, with ‘A’ being equal to ‘a’.
  188. AaBbCc,
  189. }
  190. impl SortField {
  191. /// Compares two files to determine the order they should be listed in,
  192. /// depending on the search field.
  193. ///
  194. /// The `natord` crate is used here to provide a more *natural* sorting
  195. /// order than just sorting character-by-character. This splits filenames
  196. /// into groups between letters and numbers, and then sorts those blocks
  197. /// together, so `file10` will sort after `file9`, instead of before it
  198. /// because of the `1`.
  199. pub fn compare_files(self, a: &File, b: &File) -> Ordering {
  200. use self::SortCase::{ABCabc, AaBbCc};
  201. match self {
  202. SortField::Unsorted => Ordering::Equal,
  203. SortField::Name(ABCabc) => natord::compare(&a.name, &b.name),
  204. SortField::Name(AaBbCc) => natord::compare_ignore_case(&a.name, &b.name),
  205. SortField::Size => a.metadata.len().cmp(&b.metadata.len()),
  206. SortField::FileInode => a.metadata.ino().cmp(&b.metadata.ino()),
  207. SortField::ModifiedDate => a.modified_time().cmp(&b.modified_time()),
  208. SortField::AccessedDate => a.accessed_time().cmp(&b.accessed_time()),
  209. SortField::CreatedDate => a.created_time().cmp(&b.created_time()),
  210. SortField::ModifiedAge => b.modified_time().cmp(&a.modified_time()), // flip b and a
  211. SortField::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
  212. Ordering::Equal => natord::compare(&*a.name, &*b.name),
  213. order => order,
  214. },
  215. SortField::Extension(ABCabc) => match a.ext.cmp(&b.ext) {
  216. Ordering::Equal => natord::compare(&*a.name, &*b.name),
  217. order => order,
  218. },
  219. SortField::Extension(AaBbCc) => match a.ext.cmp(&b.ext) {
  220. Ordering::Equal => natord::compare_ignore_case(&*a.name, &*b.name),
  221. order => order,
  222. },
  223. SortField::NameMixHidden(ABCabc) => natord::compare(
  224. SortField::strip_dot(&a.name),
  225. SortField::strip_dot(&b.name)
  226. ),
  227. SortField::NameMixHidden(AaBbCc) => natord::compare_ignore_case(
  228. SortField::strip_dot(&a.name),
  229. SortField::strip_dot(&b.name)
  230. )
  231. }
  232. }
  233. fn strip_dot(n: &str) -> &str {
  234. if n.starts_with('.') {
  235. &n[1..]
  236. } else {
  237. n
  238. }
  239. }
  240. }
  241. /// The **ignore patterns** are a list of globs that are tested against
  242. /// each filename, and if any of them match, that file isn’t displayed.
  243. /// This lets a user hide, say, text files by ignoring `*.txt`.
  244. #[derive(PartialEq, Default, Debug, Clone)]
  245. pub struct IgnorePatterns {
  246. patterns: Vec<glob::Pattern>,
  247. }
  248. impl FromIterator<glob::Pattern> for IgnorePatterns {
  249. fn from_iter<I: IntoIterator<Item = glob::Pattern>>(iter: I) -> Self {
  250. IgnorePatterns { patterns: iter.into_iter().collect() }
  251. }
  252. }
  253. impl IgnorePatterns {
  254. /// Create a new list from the input glob strings, turning the inputs that
  255. /// are valid glob patterns into an IgnorePatterns. The inputs that don’t
  256. /// parse correctly are returned separately.
  257. pub fn parse_from_iter<'a, I: IntoIterator<Item = &'a str>>(iter: I) -> (Self, Vec<glob::PatternError>) {
  258. let iter = iter.into_iter();
  259. // Almost all glob patterns are valid, so it’s worth pre-allocating
  260. // the vector with enough space for all of them.
  261. let mut patterns = match iter.size_hint() {
  262. (_, Some(count)) => Vec::with_capacity(count),
  263. _ => Vec::new(),
  264. };
  265. // Similarly, assume there won’t be any errors.
  266. let mut errors = Vec::new();
  267. for input in iter {
  268. match glob::Pattern::new(input) {
  269. Ok(pat) => patterns.push(pat),
  270. Err(e) => errors.push(e),
  271. }
  272. }
  273. (IgnorePatterns { patterns }, errors)
  274. }
  275. /// Create a new empty set of patterns that matches nothing.
  276. pub fn empty() -> IgnorePatterns {
  277. IgnorePatterns { patterns: Vec::new() }
  278. }
  279. /// Test whether the given file should be hidden from the results.
  280. fn is_ignored(&self, file: &str) -> bool {
  281. self.patterns.iter().any(|p| p.matches(file))
  282. }
  283. /// Test whether the given file should be hidden from the results.
  284. pub fn is_ignored_path(&self, file: &Path) -> bool {
  285. self.patterns.iter().any(|p| p.matches_path(file))
  286. }
  287. // TODO(ogham): The fact that `is_ignored_path` is pub while `is_ignored`
  288. // isn’t probably means it’s in the wrong place
  289. }
  290. /// Whether to ignore or display files that are mentioned in `.gitignore` files.
  291. #[derive(PartialEq, Debug, Copy, Clone)]
  292. pub enum GitIgnore {
  293. /// Ignore files that Git would ignore. This means doing a check for a
  294. /// `.gitignore` file, possibly recursively up the filesystem tree.
  295. CheckAndIgnore,
  296. /// Display files, even if Git would ignore them.
  297. Off,
  298. }
  299. // This is not fully baked yet. The `ignore` crate lists a lot more files that
  300. // we aren’t checking:
  301. //
  302. // > By default, all ignore files found are respected. This includes .ignore,
  303. // > .gitignore, .git/info/exclude and even your global gitignore globs,
  304. // > usually found in $XDG_CONFIG_HOME/git/ignore.
  305. #[cfg(test)]
  306. mod test_ignores {
  307. use super::*;
  308. #[test]
  309. fn empty_matches_nothing() {
  310. let pats = IgnorePatterns::empty();
  311. assert_eq!(false, pats.is_ignored("nothing"));
  312. assert_eq!(false, pats.is_ignored("test.mp3"));
  313. }
  314. #[test]
  315. fn ignores_a_glob() {
  316. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "*.mp3" ]);
  317. assert!(fails.is_empty());
  318. assert_eq!(false, pats.is_ignored("nothing"));
  319. assert_eq!(true, pats.is_ignored("test.mp3"));
  320. }
  321. #[test]
  322. fn ignores_an_exact_filename() {
  323. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing" ]);
  324. assert!(fails.is_empty());
  325. assert_eq!(true, pats.is_ignored("nothing"));
  326. assert_eq!(false, pats.is_ignored("test.mp3"));
  327. }
  328. #[test]
  329. fn ignores_both() {
  330. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing", "*.mp3" ]);
  331. assert!(fails.is_empty());
  332. assert_eq!(true, pats.is_ignored("nothing"));
  333. assert_eq!(true, pats.is_ignored("test.mp3"));
  334. }
  335. }