filter.rs 13 KB


  1. //! Filtering and sorting the list of files before displaying them.
  2. use std::cmp::Ordering;
  3. use std::iter::FromIterator;
  4. #[cfg(unix)]
  5. use std::os::unix::fs::MetadataExt;
  6. use crate::fs::DotFilter;
  7. use crate::fs::File;
  8. /// The **file filter** processes a list of files before displaying them to
  9. /// the user, by removing files they don’t want to see, and putting the list
  10. /// in the desired order.
  11. ///
  12. /// Usually a user does not want to see *every* file in the list. The most
  13. /// common case is to remove files starting with `.`, which are designated
  14. /// as ‘hidden’ files.
  15. ///
  16. /// The special files `.` and `..` files are not actually filtered out, but
  17. /// need to be inserted into the list, in a special case.
  18. ///
  19. /// The filter also governs sorting the list. After being filtered, pairs of
  20. /// files are compared and sorted based on the result, with the sort field
  21. /// performing the comparison.
  22. #[derive(PartialEq, Debug, Clone)]
  23. pub struct FileFilter {
  24. /// Whether directories should be listed first, and other types of file
  25. /// second. Some users prefer it like this.
  26. pub list_dirs_first: bool,
  27. /// The metadata field to sort by.
  28. pub sort_field: SortField,
  29. /// Whether to reverse the sorting order. This would sort the largest
  30. /// files first, or files starting with Z, or the most-recently-changed
  31. /// ones, depending on the sort field.
  32. pub reverse: bool,
  33. /// Whether to only show directories.
  34. pub only_dirs: bool,
  35. /// Which invisible “dot” files to include when listing a directory.
  36. ///
  37. /// Files starting with a single “.” are used to determine “system” or
  38. /// “configuration” files that should not be displayed in a regular
  39. /// directory listing, and the directory entries “.” and “..” are
  40. /// considered extra-special.
  41. ///
  42. /// This came about more or less by a complete historical accident,
  43. /// when the original `ls` tried to hide `.` and `..`:
  44. ///
  45. /// [Linux History: How Dot Files Became Hidden Files](https://linux-audit.com/linux-history-how-dot-files-became-hidden-files/)
  46. pub dot_filter: DotFilter,
  47. /// Glob patterns to ignore. Any file name that matches *any* of these
  48. /// patterns won’t be displayed in the list.
  49. pub ignore_patterns: IgnorePatterns,
  50. /// Whether to ignore Git-ignored patterns.
  51. pub git_ignore: GitIgnore,
  52. }
  53. impl FileFilter {
  54. /// Remove every file in the given vector that does *not* pass the
  55. /// filter predicate for files found inside a directory.
  56. pub fn filter_child_files(&self, files: &mut Vec<File<'_>>) {
  57. files.retain(|f| ! self.ignore_patterns.is_ignored(&f.name));
  58. if self.only_dirs {
  59. files.retain(File::is_directory);
  60. }
  61. }
  62. /// Remove every file in the given vector that does *not* pass the
  63. /// filter predicate for file names specified on the command-line.
  64. ///
  65. /// The rules are different for these types of files than the other
  66. /// type because the ignore rules can be used with globbing. For
  67. /// example, running `exa -I='*.tmp' .vimrc` shouldn’t filter out the
  68. /// dotfile, because it’s been directly specified. But running
  69. /// `exa -I='*.ogg' music/*` should filter out the ogg files obtained
  70. /// from the glob, even though the globbing is done by the shell!
  71. pub fn filter_argument_files(&self, files: &mut Vec<File<'_>>) {
  72. files.retain(|f| {
  73. ! self.ignore_patterns.is_ignored(&f.name)
  74. });
  75. }
  76. /// Sort the files in the given vector based on the sort field option.
  77. pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
  78. where F: AsRef<File<'a>>
  79. {
  80. files.sort_by(|a, b| {
  81. self.sort_field.compare_files(a.as_ref(), b.as_ref())
  82. });
  83. if self.reverse {
  84. files.reverse();
  85. }
  86. if self.list_dirs_first {
  87. // This relies on the fact that `sort_by` is *stable*: it will keep
  88. // adjacent elements next to each other.
  89. files.sort_by(|a, b| {
  90. b.as_ref().points_to_directory()
  91. .cmp(&a.as_ref().points_to_directory())
  92. });
  93. }
  94. }
  95. }
  96. /// User-supplied field to sort by.
  97. #[derive(PartialEq, Debug, Copy, Clone)]
  98. pub enum SortField {
  99. /// Don’t apply any sorting. This is usually used as an optimisation in
  100. /// scripts, where the order doesn’t matter.
  101. Unsorted,
  102. /// The file name. This is the default sorting.
  103. Name(SortCase),
  104. /// The file’s extension, with extensionless files being listed first.
  105. Extension(SortCase),
  106. /// The file’s size, in bytes.
  107. Size,
  108. /// The file’s inode, which usually corresponds to the order in which
  109. /// files were created on the filesystem, more or less.
  110. #[cfg(unix)]
  111. FileInode,
  112. /// The time the file was modified (the “mtime”).
  113. ///
  114. /// As this is stored as a Unix timestamp, rather than a local time
  115. /// instance, the time zone does not matter and will only be used to
  116. /// display the timestamps, not compare them.
  117. ModifiedDate,
  118. /// The time the file was accessed (the “atime”).
  119. ///
  120. /// Oddly enough, this field rarely holds the *actual* accessed time.
  121. /// Recording a read time means writing to the file each time it’s read
  122. /// slows the whole operation down, so many systems will only update the
  123. /// timestamp in certain circumstances. This has become common enough that
  124. /// it’s now expected behaviour!
  125. /// <https://unix.stackexchange.com/a/8842>
  126. AccessedDate,
  127. /// The time the file was changed (the “ctime”).
  128. ///
  129. /// This field is used to mark the time when a file’s metadata
  130. /// changed — its permissions, owners, or link count.
  131. ///
  132. /// In original Unix, this was, however, meant as creation time.
  133. /// <https://www.bell-labs.com/usr/dmr/www/cacm.html>
  134. ChangedDate,
  135. /// The time the file was created (the “btime” or “birthtime”).
  136. CreatedDate,
  137. /// The type of the file: directories, links, pipes, regular, files, etc.
  138. ///
  139. /// Files are ordered according to the `PartialOrd` implementation of
  140. /// `fs::fields::Type`, so changing that will change this.
  141. FileType,
  142. /// The “age” of the file, which is the time it was modified sorted
  143. /// backwards. The reverse of the `ModifiedDate` ordering!
  144. ///
  145. /// It turns out that listing the most-recently-modified files first is a
  146. /// common-enough use case that it deserves its own variant. This would be
  147. /// implemented by just using the modified date and setting the reverse
  148. /// flag, but this would make reversing *that* output not work, which is
  149. /// bad, even though that’s kind of nonsensical. So it’s its own variant
  150. /// that can be reversed like usual.
  151. ModifiedAge,
  152. /// The file's name, however if the name of the file begins with `.`
  153. /// ignore the leading `.` and then sort as Name
  154. NameMixHidden(SortCase),
  155. }
  156. /// Whether a field should be sorted case-sensitively or case-insensitively.
  157. /// This determines which of the `natord` functions to use.
  158. ///
  159. /// I kept on forgetting which one was sensitive and which one was
  160. /// insensitive. Would a case-sensitive sort put capital letters first because
  161. /// it takes the case of the letters into account, or intermingle them with
  162. /// lowercase letters because it takes the difference between the two cases
  163. /// into account? I gave up and just named these two variants after the
  164. /// effects they have.
  165. #[derive(PartialEq, Debug, Copy, Clone)]
  166. pub enum SortCase {
  167. /// Sort files case-sensitively with uppercase first, with ‘A’ coming
  168. /// before ‘a’.
  169. ABCabc,
  170. /// Sort files case-insensitively, with ‘A’ being equal to ‘a’.
  171. AaBbCc,
  172. }
  173. impl SortField {
  174. /// Compares two files to determine the order they should be listed in,
  175. /// depending on the search field.
  176. ///
  177. /// The `natord` crate is used here to provide a more *natural* sorting
  178. /// order than just sorting character-by-character. This splits filenames
  179. /// into groups between letters and numbers, and then sorts those blocks
  180. /// together, so `file10` will sort after `file9`, instead of before it
  181. /// because of the `1`.
  182. pub fn compare_files(self, a: &File<'_>, b: &File<'_>) -> Ordering {
  183. use self::SortCase::{ABCabc, AaBbCc};
  184. match self {
  185. Self::Unsorted => Ordering::Equal,
  186. Self::Name(ABCabc) => natord::compare(&a.name, &b.name),
  187. Self::Name(AaBbCc) => natord::compare_ignore_case(&a.name, &b.name),
  188. Self::Size => a.metadata.len().cmp(&b.metadata.len()),
  189. #[cfg(unix)]
  190. Self::FileInode => a.metadata.ino().cmp(&b.metadata.ino()),
  191. Self::ModifiedDate => a.modified_time().cmp(&b.modified_time()),
  192. Self::AccessedDate => a.accessed_time().cmp(&b.accessed_time()),
  193. Self::ChangedDate => a.changed_time().cmp(&b.changed_time()),
  194. Self::CreatedDate => a.created_time().cmp(&b.created_time()),
  195. Self::ModifiedAge => b.modified_time().cmp(&a.modified_time()), // flip b and a
  196. Self::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
  197. Ordering::Equal => natord::compare(&*a.name, &*b.name),
  198. order => order,
  199. },
  200. Self::Extension(ABCabc) => match a.ext.cmp(&b.ext) {
  201. Ordering::Equal => natord::compare(&*a.name, &*b.name),
  202. order => order,
  203. },
  204. Self::Extension(AaBbCc) => match a.ext.cmp(&b.ext) {
  205. Ordering::Equal => natord::compare_ignore_case(&*a.name, &*b.name),
  206. order => order,
  207. },
  208. Self::NameMixHidden(ABCabc) => natord::compare(
  209. Self::strip_dot(&a.name),
  210. Self::strip_dot(&b.name)
  211. ),
  212. Self::NameMixHidden(AaBbCc) => natord::compare_ignore_case(
  213. Self::strip_dot(&a.name),
  214. Self::strip_dot(&b.name)
  215. )
  216. }
  217. }
  218. fn strip_dot(n: &str) -> &str {
  219. match n.strip_prefix('.') {
  220. Some(s) => s,
  221. None => n,
  222. }
  223. }
  224. }
  225. /// The **ignore patterns** are a list of globs that are tested against
  226. /// each filename, and if any of them match, that file isn’t displayed.
  227. /// This lets a user hide, say, text files by ignoring `*.txt`.
  228. #[derive(PartialEq, Default, Debug, Clone)]
  229. pub struct IgnorePatterns {
  230. patterns: Vec<glob::Pattern>,
  231. }
  232. impl FromIterator<glob::Pattern> for IgnorePatterns {
  233. fn from_iter<I>(iter: I) -> Self
  234. where I: IntoIterator<Item = glob::Pattern>
  235. {
  236. let patterns = iter.into_iter().collect();
  237. Self { patterns }
  238. }
  239. }
  240. impl IgnorePatterns {
  241. /// Create a new list from the input glob strings, turning the inputs that
  242. /// are valid glob patterns into an `IgnorePatterns`. The inputs that
  243. /// don’t parse correctly are returned separately.
  244. pub fn parse_from_iter<'a, I: IntoIterator<Item = &'a str>>(iter: I) -> (Self, Vec<glob::PatternError>) {
  245. let iter = iter.into_iter();
  246. // Almost all glob patterns are valid, so it’s worth pre-allocating
  247. // the vector with enough space for all of them.
  248. let mut patterns = match iter.size_hint() {
  249. (_, Some(count)) => Vec::with_capacity(count),
  250. _ => Vec::new(),
  251. };
  252. // Similarly, assume there won’t be any errors.
  253. let mut errors = Vec::new();
  254. for input in iter {
  255. match glob::Pattern::new(input) {
  256. Ok(pat) => patterns.push(pat),
  257. Err(e) => errors.push(e),
  258. }
  259. }
  260. (Self { patterns }, errors)
  261. }
  262. /// Create a new empty set of patterns that matches nothing.
  263. pub fn empty() -> Self {
  264. Self { patterns: Vec::new() }
  265. }
  266. /// Test whether the given file should be hidden from the results.
  267. fn is_ignored(&self, file: &str) -> bool {
  268. self.patterns.iter().any(|p| p.matches(file))
  269. }
  270. }
  271. /// Whether to ignore or display files that Git would ignore.
  272. #[derive(PartialEq, Debug, Copy, Clone)]
  273. pub enum GitIgnore {
  274. /// Ignore files that Git would ignore.
  275. CheckAndIgnore,
  276. /// Display files, even if Git would ignore them.
  277. Off,
  278. }
  279. #[cfg(test)]
  280. mod test_ignores {
  281. use super::*;
  282. #[test]
  283. fn empty_matches_nothing() {
  284. let pats = IgnorePatterns::empty();
  285. assert_eq!(false, pats.is_ignored("nothing"));
  286. assert_eq!(false, pats.is_ignored("test.mp3"));
  287. }
  288. #[test]
  289. fn ignores_a_glob() {
  290. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "*.mp3" ]);
  291. assert!(fails.is_empty());
  292. assert_eq!(false, pats.is_ignored("nothing"));
  293. assert_eq!(true, pats.is_ignored("test.mp3"));
  294. }
  295. #[test]
  296. fn ignores_an_exact_filename() {
  297. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing" ]);
  298. assert!(fails.is_empty());
  299. assert_eq!(true, pats.is_ignored("nothing"));
  300. assert_eq!(false, pats.is_ignored("test.mp3"));
  301. }
  302. #[test]
  303. fn ignores_both() {
  304. let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing", "*.mp3" ]);
  305. assert!(fails.is_empty());
  306. assert_eq!(true, pats.is_ignored("nothing"));
  307. assert_eq!(true, pats.is_ignored("test.mp3"));
  308. }
  309. }