filter.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. //! Filtering and sorting the list of files before displaying them.
  2. use std::cmp::Ordering;
  3. use std::iter::FromIterator;
  4. #[cfg(unix)]
  5. use std::os::unix::fs::MetadataExt;
  6. use crate::fs::DotFilter;
  7. use crate::fs::File;
  8. /// Flags used to manage the **file filter** process
  9. #[derive(PartialEq, Eq, Debug, Clone)]
  10. pub enum FileFilterFlags {
  11. /// Whether to reverse the sorting order. This would sort the largest
  12. /// files first, or files starting with Z, or the most-recently-changed
  13. /// ones, depending on the sort field.
  14. Reverse,
  15. /// Whether to only show directories.
  16. OnlyDirs,
  17. /// Whether to only show files.
  18. OnlyFiles,
  19. /// Whether to ignore symlinks
  20. NoSymlinks,
  21. /// Whether to explicitly show symlinks
  22. ShowSymlinks,
  23. }
  24. /// The **file filter** processes a list of files before displaying them to
  25. /// the user, by removing files they don’t want to see, and putting the list
  26. /// in the desired order.
  27. ///
  28. /// Usually a user does not want to see *every* file in the list. The most
  29. /// common case is to remove files starting with `.`, which are designated
  30. /// as ‘hidden’ files.
  31. ///
  32. /// The special files `.` and `..` files are not actually filtered out, but
  33. /// need to be inserted into the list, in a special case.
  34. ///
  35. /// The filter also governs sorting the list. After being filtered, pairs of
  36. /// files are compared and sorted based on the result, with the sort field
  37. /// performing the comparison.
  38. #[derive(PartialEq, Eq, Debug, Clone)]
  39. pub struct FileFilter {
  40. /// Whether directories should be listed first, and other types of file
  41. /// second. Some users prefer it like this.
  42. pub list_dirs_first: bool,
  43. /// The metadata field to sort by.
  44. pub sort_field: SortField,
  45. // Flags that the file filtering process follow
  46. pub flags: Vec<FileFilterFlags>,
  47. /// Which invisible “dot” files to include when listing a directory.
  48. ///
  49. /// Files starting with a single “.” are used to determine “system” or
  50. /// “configuration” files that should not be displayed in a regular
  51. /// directory listing, and the directory entries “.” and “..” are
  52. /// considered extra-special.
  53. ///
  54. /// This came about more or less by a complete historical accident,
  55. /// when the original `ls` tried to hide `.` and `..`:
  56. ///
  57. /// [Linux History: How Dot Files Became Hidden Files](https://linux-audit.com/linux-history-how-dot-files-became-hidden-files/)
  58. pub dot_filter: DotFilter,
  59. /// Glob patterns to ignore. Any file name that matches *any* of these
  60. /// patterns won’t be displayed in the list.
  61. pub ignore_patterns: IgnorePatterns,
  62. /// Whether to ignore Git-ignored patterns.
  63. pub git_ignore: GitIgnore,
  64. /// Whether to ignore symlinks
  65. pub no_symlinks: bool,
  66. /// Whether to explicitly show symlinks
  67. pub show_symlinks: bool,
  68. }
  69. impl FileFilter {
  70. /// Remove every file in the given vector that does *not* pass the
  71. /// filter predicate for files found inside a directory.
  72. #[rustfmt::skip]
  73. pub fn filter_child_files(&self, is_recurse: bool, files: &mut Vec<File<'_>>) {
  74. use FileFilterFlags::{NoSymlinks, OnlyDirs, OnlyFiles, ShowSymlinks};
  75. files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
  76. files.retain(|f| {
  77. match (
  78. self.flags.contains(&OnlyDirs),
  79. self.flags.contains(&OnlyFiles),
  80. self.flags.contains(&NoSymlinks),
  81. self.flags.contains(&ShowSymlinks),
  82. ) {
  83. (true, false, false, false) => f.is_directory(),
  84. (true, false, true, false) => f.is_directory(),
  85. (true, false, false, true) => f.is_directory() || f.points_to_directory(),
  86. (false, true, false, false) => if is_recurse { true } else {f.is_file() },
  87. (false, true, false, true) => if is_recurse { true } else { f.is_file() || f.is_link() && !f.points_to_directory()
  88. },
  89. (false, false, true, false) => !f.is_link(),
  90. _ => true,
  91. }
  92. });
  93. }
  94. /// Remove every file in the given vector that does *not* pass the
  95. /// filter predicate for file names specified on the command-line.
  96. ///
  97. /// The rules are different for these types of files than the other
  98. /// type because the ignore rules can be used with globbing. For
  99. /// example, running `exa -I='*. tmp' .vimrc` shouldn’t filter out the
  100. /// dotfile, because it’s been directly specified. But running
  101. /// `exa -I='*.ogg' music/*` should filter out the ogg files obtained
  102. /// from the glob, even though the globbing is done by the shell!
  103. pub fn filter_argument_files(&self, files: &mut Vec<File<'_>>) {
  104. files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
  105. }
  106. /// Sort the files in the given vector based on the sort field option.
  107. pub fn sort_files<'a, F>(&self, files: &mut [F])
  108. where
  109. F: AsRef<File<'a>>,
  110. {
  111. files.sort_by(|a, b| self.sort_field.compare_files(a.as_ref(), b.as_ref()));
  112. if self.flags.contains(&FileFilterFlags::Reverse) {
  113. files.reverse();
  114. }
  115. if self.list_dirs_first {
  116. // This relies on the fact that `sort_by` is *stable*: it will keep
  117. // adjacent elements next to each other.
  118. files.sort_by(|a, b| {
  119. b.as_ref()
  120. .points_to_directory()
  121. .cmp(&a.as_ref().points_to_directory())
  122. });
  123. }
  124. }
  125. }
  126. /// User-supplied field to sort by.
  127. #[derive(PartialEq, Eq, Debug, Copy, Clone)]
  128. pub enum SortField {
  129. /// Don’t apply any sorting. This is usually used as an optimisation in
  130. /// scripts, where the order doesn’t matter.
  131. Unsorted,
  132. /// The file name. This is the default sorting.
  133. Name(SortCase),
  134. /// The file’s extension, with extensionless files being listed first.
  135. Extension(SortCase),
  136. /// The file’s size, in bytes.
  137. Size,
  138. /// The file’s inode, which usually corresponds to the order in which
  139. /// files were created on the filesystem, more or less.
  140. #[cfg(unix)]
  141. FileInode,
  142. /// The time the file was modified (the “mtime”).
  143. ///
  144. /// As this is stored as a Unix timestamp, rather than a local time
  145. /// instance, the time zone does not matter and will only be used to
  146. /// display the timestamps, not compare them.
  147. ModifiedDate,
  148. /// The time the file was accessed (the “atime”).
  149. ///
  150. /// Oddly enough, this field rarely holds the *actual* accessed time.
  151. /// Recording a read time means writing to the file each time it’s read
  152. /// slows the whole operation down, so many systems will only update the
  153. /// timestamp in certain circumstances. This has become common enough that
  154. /// it’s now expected behaviour!
  155. /// <https://unix.stackexchange.com/a/8842>
  156. AccessedDate,
  157. /// The time the file was changed (the “ctime”).
  158. ///
  159. /// This field is used to mark the time when a file’s metadata
  160. /// changed — its permissions, owners, or link count.
  161. ///
  162. /// In original Unix, this was, however, meant as creation time.
  163. /// <https://www.bell-labs.com/usr/dmr/www/cacm.html>
  164. ChangedDate,
  165. /// The time the file was created (the “btime” or “birthtime”).
  166. CreatedDate,
  167. /// The type of the file: directories, links, pipes, regular, files, etc.
  168. ///
  169. /// Files are ordered according to the `PartialOrd` implementation of
  170. /// `fs::fields::Type`, so changing that will change this.
  171. FileType,
  172. /// The “age” of the file, which is the time it was modified sorted
  173. /// backwards. The reverse of the `ModifiedDate` ordering!
  174. ///
  175. /// It turns out that listing the most-recently-modified files first is a
  176. /// common-enough use case that it deserves its own variant. This would be
  177. /// implemented by just using the modified date and setting the reverse
  178. /// flag, but this would make reversing *that* output not work, which is
  179. /// bad, even though that’s kind of nonsensical. So it’s its own variant
  180. /// that can be reversed like usual.
  181. ModifiedAge,
  182. /// The file's name, however if the name of the file begins with `.`
  183. /// ignore the leading `.` and then sort as Name
  184. NameMixHidden(SortCase),
  185. }
  186. /// Whether a field should be sorted case-sensitively or case-insensitively.
  187. /// This determines which of the `natord` functions to use.
  188. ///
  189. /// I kept on forgetting which one was sensitive and which one was
  190. /// insensitive. Would a case-sensitive sort put capital letters first because
  191. /// it takes the case of the letters into account, or intermingle them with
  192. /// lowercase letters because it takes the difference between the two cases
  193. /// into account? I gave up and just named these two variants after the
  194. /// effects they have.
  195. #[derive(PartialEq, Eq, Debug, Copy, Clone)]
  196. pub enum SortCase {
  197. /// Sort files case-sensitively with uppercase first, with ‘A’ coming
  198. /// before ‘a’.
  199. ABCabc,
  200. /// Sort files case-insensitively, with ‘A’ being equal to ‘a’.
  201. AaBbCc,
  202. }
  203. impl SortField {
  204. /// Compares two files to determine the order they should be listed in,
  205. /// depending on the search field.
  206. ///
  207. /// The `natord` crate is used here to provide a more *natural* sorting
  208. /// order than just sorting character-by-character. This splits filenames
  209. /// into groups between letters and numbers, and then sorts those blocks
  210. /// together, so `file10` will sort after `file9`, instead of before it
  211. /// because of the `1`.
  212. pub fn compare_files(self, a: &File<'_>, b: &File<'_>) -> Ordering {
  213. use self::SortCase::{ABCabc, AaBbCc};
  214. #[rustfmt::skip]
  215. return match self {
  216. Self::Unsorted => Ordering::Equal,
  217. Self::Name(ABCabc) => natord::compare(&a.name, &b.name),
  218. Self::Name(AaBbCc) => natord::compare_ignore_case(&a.name, &b.name),
  219. Self::Size => a.length().cmp(&b.length()),
  220. #[cfg(unix)]
  221. Self::FileInode => a.metadata.ino().cmp(&b.metadata.ino()),
  222. Self::ModifiedDate => a.modified_time().cmp(&b.modified_time()),
  223. Self::AccessedDate => a.accessed_time().cmp(&b.accessed_time()),
  224. Self::ChangedDate => a.changed_time().cmp(&b.changed_time()),
  225. Self::CreatedDate => a.created_time().cmp(&b.created_time()),
  226. Self::ModifiedAge => b.modified_time().cmp(&a.modified_time()), // flip b and a
  227. Self::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
  228. Ordering::Equal => natord::compare(&a.name, &b.name),
  229. order => order,
  230. },
  231. Self::Extension(ABCabc) => match a.ext.cmp(&b.ext) {
  232. Ordering::Equal => natord::compare(&a.name, &b.name),
  233. order => order,
  234. },
  235. Self::Extension(AaBbCc) => match a.ext.cmp(&b.ext) {
  236. Ordering::Equal => natord::compare_ignore_case(&a.name, &b.name),
  237. order => order,
  238. },
  239. Self::NameMixHidden(ABCabc) => natord::compare(
  240. Self::strip_dot(&a.name),
  241. Self::strip_dot(&b.name)
  242. ),
  243. Self::NameMixHidden(AaBbCc) => natord::compare_ignore_case(
  244. Self::strip_dot(&a.name),
  245. Self::strip_dot(&b.name)
  246. ),
  247. };
  248. }
  249. fn strip_dot(n: &str) -> &str {
  250. match n.strip_prefix('.') {
  251. Some(s) => s,
  252. None => n,
  253. }
  254. }
  255. }
  256. /// The **ignore patterns** are a list of globs that are tested against
  257. /// each filename, and if any of them match, that file isn’t displayed.
  258. /// This lets a user hide, say, text files by ignoring `*.txt`.
  259. #[derive(PartialEq, Eq, Default, Debug, Clone)]
  260. pub struct IgnorePatterns {
  261. patterns: Vec<glob::Pattern>,
  262. }
  263. impl FromIterator<glob::Pattern> for IgnorePatterns {
  264. fn from_iter<I>(iter: I) -> Self
  265. where
  266. I: IntoIterator<Item = glob::Pattern>,
  267. {
  268. let patterns = iter.into_iter().collect();
  269. Self { patterns }
  270. }
  271. }
  272. impl IgnorePatterns {
  273. /// Create a new list from the input glob strings, turning the inputs that
  274. /// are valid glob patterns into an `IgnorePatterns`. The inputs that
  275. /// don’t parse correctly are returned separately.
  276. pub fn parse_from_iter<'a, I: IntoIterator<Item = &'a str>>(
  277. iter: I,
  278. ) -> (Self, Vec<glob::PatternError>) {
  279. let iter = iter.into_iter();
  280. // Almost all glob patterns are valid, so it’s worth pre-allocating
  281. // the vector with enough space for all of them.
  282. let mut patterns = match iter.size_hint() {
  283. (_, Some(count)) => Vec::with_capacity(count),
  284. _ => Vec::new(),
  285. };
  286. // Similarly, assume there won’t be any errors.
  287. let mut errors = Vec::new();
  288. for input in iter {
  289. match glob::Pattern::new(input) {
  290. Ok(pat) => patterns.push(pat),
  291. Err(e) => errors.push(e),
  292. }
  293. }
  294. (Self { patterns }, errors)
  295. }
  296. /// Create a new empty set of patterns that matches nothing.
  297. pub fn empty() -> Self {
  298. Self {
  299. patterns: Vec::new(),
  300. }
  301. }
  302. /// Test whether the given file should be hidden from the results.
  303. fn is_ignored(&self, file: &str) -> bool {
  304. self.patterns.iter().any(|p| p.matches(file))
  305. }
  306. }
  307. /// Whether to ignore or display files that Git would ignore.
  308. #[derive(PartialEq, Eq, Debug, Copy, Clone)]
  309. pub enum GitIgnore {
  310. /// Ignore files that Git would ignore.
  311. CheckAndIgnore,
  312. /// Display files, even if Git would ignore them.
  313. Off,
  314. }
  315. #[cfg(test)]
  316. mod test_ignores {
  317. use super::*;
  318. #[test]
  319. fn empty_matches_nothing() {
  320. let pats = IgnorePatterns::empty();
  321. assert!(!pats.is_ignored("nothing"));
  322. assert!(!pats.is_ignored("test.mp3"));
  323. }
  324. #[test]
  325. fn ignores_a_glob() {
  326. let (pats, fails) = IgnorePatterns::parse_from_iter(vec!["*.mp3"]);
  327. assert!(fails.is_empty());
  328. assert!(!pats.is_ignored("nothing"));
  329. assert!(pats.is_ignored("test.mp3"));
  330. }
  331. #[test]
  332. fn ignores_an_exact_filename() {
  333. let (pats, fails) = IgnorePatterns::parse_from_iter(vec!["nothing"]);
  334. assert!(fails.is_empty());
  335. assert!(pats.is_ignored("nothing"));
  336. assert!(!pats.is_ignored("test.mp3"));
  337. }
  338. #[test]
  339. fn ignores_both() {
  340. let (pats, fails) = IgnorePatterns::parse_from_iter(vec!["nothing", "*.mp3"]);
  341. assert!(fails.is_empty());
  342. assert!(pats.is_ignored("nothing"));
  343. assert!(pats.is_ignored("test.mp3"));
  344. }
  345. }