git.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. //! Getting the Git status of files and directories.
  2. use std::env;
  3. use std::ffi::OsStr;
  4. #[cfg(target_family = "unix")]
  5. use std::os::unix::ffi::OsStrExt;
  6. use std::path::{Path, PathBuf};
  7. use std::sync::Mutex;
  8. use log::*;
  9. use crate::fs::fields as f;
  10. /// A **Git cache** is assembled based on the user’s input arguments.
  11. ///
  12. /// This uses vectors to avoid the overhead of hashing: it’s not worth it when the
  13. /// expected number of Git repositories per exa invocation is 0 or 1...
  14. pub struct GitCache {
  15. /// A list of discovered Git repositories and their paths.
  16. repos: Vec<GitRepo>,
  17. /// Paths that we’ve confirmed do not have Git repositories underneath them.
  18. misses: Vec<PathBuf>,
  19. }
  20. impl GitCache {
  21. pub fn has_anything_for(&self, index: &Path) -> bool {
  22. self.repos.iter().any(|e| e.has_path(index))
  23. }
  24. pub fn get(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  25. self.repos.iter()
  26. .find(|repo| repo.has_path(index))
  27. .map(|repo| repo.search(index, prefix_lookup))
  28. .unwrap_or_default()
  29. }
  30. }
  31. use std::iter::FromIterator;
  32. impl FromIterator<PathBuf> for GitCache {
  33. fn from_iter<I>(iter: I) -> Self
  34. where I: IntoIterator<Item=PathBuf>
  35. {
  36. let iter = iter.into_iter();
  37. let mut git = Self {
  38. repos: Vec::with_capacity(iter.size_hint().0),
  39. misses: Vec::new(),
  40. };
  41. if let Ok(path) = env::var("GIT_DIR") {
  42. // These flags are consistent with how `git` uses GIT_DIR:
  43. let flags = git2::RepositoryOpenFlags::NO_SEARCH | git2::RepositoryOpenFlags::NO_DOTGIT;
  44. match GitRepo::discover(path.into(), flags) {
  45. Ok(repo) => {
  46. debug!("Opened GIT_DIR repo");
  47. git.repos.push(repo);
  48. }
  49. Err(miss) => {
  50. git.misses.push(miss);
  51. }
  52. }
  53. }
  54. for path in iter {
  55. if git.misses.contains(&path) {
  56. debug!("Skipping {:?} because it already came back Gitless", path);
  57. }
  58. else if git.repos.iter().any(|e| e.has_path(&path)) {
  59. debug!("Skipping {:?} because we already queried it", path);
  60. }
  61. else {
  62. let flags = git2::RepositoryOpenFlags::FROM_ENV;
  63. match GitRepo::discover(path, flags) {
  64. Ok(r) => {
  65. if let Some(r2) = git.repos.iter_mut().find(|e| e.has_workdir(&r.workdir)) {
  66. debug!("Adding to existing repo (workdir matches with {:?})", r2.workdir);
  67. r2.extra_paths.push(r.original_path);
  68. continue;
  69. }
  70. debug!("Discovered new Git repo");
  71. git.repos.push(r);
  72. }
  73. Err(miss) => {
  74. git.misses.push(miss);
  75. }
  76. }
  77. }
  78. }
  79. git
  80. }
  81. }
  82. /// A **Git repository** is one we’ve discovered somewhere on the filesystem.
  83. pub struct GitRepo {
  84. /// The queryable contents of the repository: either a `git2` repo, or the
  85. /// cached results from when we queried it last time.
  86. contents: Mutex<GitContents>,
  87. /// The working directory of this repository.
  88. /// This is used to check whether two repositories are the same.
  89. workdir: PathBuf,
  90. /// The path that was originally checked to discover this repository.
  91. /// This is as important as the extra_paths (it gets checked first), but
  92. /// is separate to avoid having to deal with a non-empty Vec.
  93. original_path: PathBuf,
  94. /// Any other paths that were checked only to result in this same
  95. /// repository.
  96. extra_paths: Vec<PathBuf>,
  97. }
  98. /// A repository’s queried state.
  99. enum GitContents {
  100. /// All the interesting Git stuff goes through this.
  101. Before {
  102. repo: git2::Repository,
  103. },
  104. /// Temporary value used in `repo_to_statuses` so we can move the
  105. /// repository out of the `Before` variant.
  106. Processing,
  107. /// The data we’ve extracted from the repository, but only after we’ve
  108. /// actually done so.
  109. After {
  110. statuses: Git,
  111. },
  112. }
  113. impl GitRepo {
  114. /// Searches through this repository for a path (to a file or directory,
  115. /// depending on the prefix-lookup flag) and returns its Git status.
  116. ///
  117. /// Actually querying the `git2` repository for the mapping of paths to
  118. /// Git statuses is only done once, and gets cached so we don’t need to
  119. /// re-query the entire repository the times after that.
  120. ///
  121. /// The temporary `Processing` enum variant is used after the `git2`
  122. /// repository is moved out, but before the results have been moved in!
  123. /// See <https://stackoverflow.com/q/45985827/3484614>
  124. fn search(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  125. use std::mem::replace;
  126. let mut contents = self.contents.lock().unwrap();
  127. if let GitContents::After { ref statuses } = *contents {
  128. debug!("Git repo {:?} has been found in cache", &self.workdir);
  129. return statuses.status(index, prefix_lookup);
  130. }
  131. debug!("Querying Git repo {:?} for the first time", &self.workdir);
  132. let repo = replace(&mut *contents, GitContents::Processing).inner_repo();
  133. let statuses = repo_to_statuses(&repo, &self.workdir);
  134. let result = statuses.status(index, prefix_lookup);
  135. let _processing = replace(&mut *contents, GitContents::After { statuses });
  136. result
  137. }
  138. /// Whether this repository has the given working directory.
  139. fn has_workdir(&self, path: &Path) -> bool {
  140. self.workdir == path
  141. }
  142. /// Whether this repository cares about the given path at all.
  143. fn has_path(&self, path: &Path) -> bool {
  144. path.starts_with(&self.original_path) || self.extra_paths.iter().any(|e| path.starts_with(e))
  145. }
  146. /// Open a Git repository. Depending on the flags, the path is either
  147. /// the repository's "gitdir" (or a "gitlink" to the gitdir), or the
  148. /// path is the start of a rootwards search for the repository.
  149. fn discover(path: PathBuf, flags: git2::RepositoryOpenFlags) -> Result<Self, PathBuf> {
  150. info!("Opening Git repository for {:?} ({:?})", path, flags);
  151. let unused: [&OsStr; 0] = [];
  152. let repo = match git2::Repository::open_ext(&path, flags, unused) {
  153. Ok(r) => r,
  154. Err(e) => {
  155. error!("Error opening Git repository for {path:?}: {e:?}");
  156. return Err(path);
  157. }
  158. };
  159. if let Some(workdir) = repo.workdir() {
  160. let workdir = workdir.to_path_buf();
  161. let contents = Mutex::new(GitContents::Before { repo });
  162. Ok(Self { contents, workdir, original_path: path, extra_paths: Vec::new() })
  163. }
  164. else {
  165. warn!("Repository has no workdir?");
  166. Err(path)
  167. }
  168. }
  169. }
  170. impl GitContents {
  171. /// Assumes that the repository hasn’t been queried, and extracts it
  172. /// (consuming the value) if it has. This is needed because the entire
  173. /// enum variant gets replaced when a repo is queried (see above).
  174. fn inner_repo(self) -> git2::Repository {
  175. if let Self::Before { repo } = self {
  176. repo
  177. }
  178. else {
  179. unreachable!("Tried to extract a non-Repository")
  180. }
  181. }
  182. }
  183. /// Iterates through a repository’s statuses, consuming it and returning the
  184. /// mapping of files to their Git status.
  185. /// We will have already used the working directory at this point, so it gets
  186. /// passed in rather than deriving it from the `Repository` again.
  187. fn repo_to_statuses(repo: &git2::Repository, workdir: &Path) -> Git {
  188. let mut statuses = Vec::new();
  189. info!("Getting Git statuses for repo with workdir {:?}", workdir);
  190. match repo.statuses(None) {
  191. Ok(es) => {
  192. for e in es.iter() {
  193. #[cfg(target_family = "unix")]
  194. let path = workdir.join(Path::new(OsStr::from_bytes(e.path_bytes())));
  195. // TODO: handle non Unix systems better:
  196. // https://github.com/ogham/exa/issues/698
  197. #[cfg(not(target_family = "unix"))]
  198. let path = workdir.join(Path::new(e.path().unwrap()));
  199. let elem = (path, e.status());
  200. statuses.push(elem);
  201. }
  202. // We manually add the `.git` at the root of the repo as ignored, since it is in practice.
  203. // Also we want to avoid `eza --tree --all --git-ignore` to display files inside `.git`.
  204. statuses.push((workdir.join(".git"), git2::Status::IGNORED));
  205. }
  206. Err(e) => {
  207. error!("Error looking up Git statuses: {:?}", e);
  208. }
  209. }
  210. Git { statuses }
  211. }
  212. // The `repo.statuses` call above takes a long time. exa debug output:
  213. //
  214. // 20.311276 INFO:exa::fs::feature::git: Getting Git statuses for repo with workdir "/vagrant/"
  215. // 20.799610 DEBUG:exa::output::table: Getting Git status for file "./Cargo.toml"
  216. //
  217. // Even inserting another logging line immediately afterwards doesn’t make it
  218. // look any faster.
  219. /// Container of Git statuses for all the files in this folder’s Git repository.
  220. struct Git {
  221. statuses: Vec<(PathBuf, git2::Status)>,
  222. }
  223. impl Git {
  224. /// Get either the file or directory status for the given path.
  225. /// “Prefix lookup” means that it should report an aggregate status of all
  226. /// paths starting with the given prefix (in other words, a directory).
  227. fn status(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  228. if prefix_lookup { self.dir_status(index) }
  229. else { self.file_status(index) }
  230. }
  231. /// Get the user-facing status of a file.
  232. /// We check the statuses directly applying to a file, and for the ignored
  233. /// status we check if any of its parents directories is ignored by git.
  234. fn file_status(&self, file: &Path) -> f::Git {
  235. let path = reorient(file);
  236. let s = self.statuses.iter()
  237. .filter(|p| if p.1 == git2::Status::IGNORED {
  238. path.starts_with(&p.0)
  239. } else {
  240. p.0 == path
  241. })
  242. .fold(git2::Status::empty(), |a, b| a | b.1);
  243. let staged = index_status(s);
  244. let unstaged = working_tree_status(s);
  245. f::Git { staged, unstaged }
  246. }
  247. /// Get the combined, user-facing status of a directory.
  248. /// Statuses are aggregating (for example, a directory is considered
  249. /// modified if any file under it has the status modified), except for
  250. /// ignored status which applies to files under (for example, a directory
  251. /// is considered ignored if one of its parent directories is ignored).
  252. fn dir_status(&self, dir: &Path) -> f::Git {
  253. let path = reorient(dir);
  254. let s = self.statuses.iter()
  255. .filter(|p| if p.1 == git2::Status::IGNORED {
  256. path.starts_with(&p.0)
  257. } else {
  258. p.0.starts_with(&path)
  259. })
  260. .fold(git2::Status::empty(), |a, b| a | b.1);
  261. let staged = index_status(s);
  262. let unstaged = working_tree_status(s);
  263. f::Git { staged, unstaged }
  264. }
  265. }
  266. /// Converts a path to an absolute path based on the current directory.
  267. /// Paths need to be absolute for them to be compared properly, otherwise
  268. /// you’d ask a repo about “./README.md” but it only knows about
  269. /// “/vagrant/README.md”, prefixed by the workdir.
  270. #[cfg(unix)]
  271. fn reorient(path: &Path) -> PathBuf {
  272. use std::env::current_dir;
  273. // TODO: I’m not 100% on this func tbh
  274. let path = match current_dir() {
  275. Err(_) => Path::new(".").join(path),
  276. Ok(dir) => dir.join(path),
  277. };
  278. path.canonicalize().unwrap_or(path)
  279. }
  280. #[cfg(windows)]
  281. fn reorient(path: &Path) -> PathBuf {
  282. let unc_path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
  283. // On Windows UNC path is returned. We need to strip the prefix for it to work.
  284. let normal_path = unc_path.as_os_str().to_str().unwrap().trim_start_matches("\\\\?\\");
  285. PathBuf::from(normal_path)
  286. }
  287. /// The character to display if the file has been modified, but not staged.
  288. fn working_tree_status(status: git2::Status) -> f::GitStatus {
  289. match status {
  290. s if s.contains(git2::Status::WT_NEW) => f::GitStatus::New,
  291. s if s.contains(git2::Status::WT_MODIFIED) => f::GitStatus::Modified,
  292. s if s.contains(git2::Status::WT_DELETED) => f::GitStatus::Deleted,
  293. s if s.contains(git2::Status::WT_RENAMED) => f::GitStatus::Renamed,
  294. s if s.contains(git2::Status::WT_TYPECHANGE) => f::GitStatus::TypeChange,
  295. s if s.contains(git2::Status::IGNORED) => f::GitStatus::Ignored,
  296. s if s.contains(git2::Status::CONFLICTED) => f::GitStatus::Conflicted,
  297. _ => f::GitStatus::NotModified,
  298. }
  299. }
  300. /// The character to display if the file has been modified and the change
  301. /// has been staged.
  302. fn index_status(status: git2::Status) -> f::GitStatus {
  303. match status {
  304. s if s.contains(git2::Status::INDEX_NEW) => f::GitStatus::New,
  305. s if s.contains(git2::Status::INDEX_MODIFIED) => f::GitStatus::Modified,
  306. s if s.contains(git2::Status::INDEX_DELETED) => f::GitStatus::Deleted,
  307. s if s.contains(git2::Status::INDEX_RENAMED) => f::GitStatus::Renamed,
  308. s if s.contains(git2::Status::INDEX_TYPECHANGE) => f::GitStatus::TypeChange,
  309. _ => f::GitStatus::NotModified,
  310. }
  311. }
  312. fn current_branch(repo: &git2::Repository) -> Option<String>{
  313. let head = match repo.head() {
  314. Ok(head) => Some(head),
  315. Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch || e.code() == git2::ErrorCode::NotFound => return None,
  316. Err(e) => {
  317. error!("Error looking up Git branch: {:?}", e);
  318. return None
  319. }
  320. };
  321. if let Some(h) = head{
  322. if let Some(s) = h.shorthand(){
  323. let branch_name = s.to_owned();
  324. if branch_name.len() > 10 {
  325. return Some(branch_name[..8].to_string()+"..");
  326. }
  327. return Some(branch_name);
  328. }
  329. }
  330. None
  331. }
  332. impl f::SubdirGitRepo{
  333. pub fn from_path(dir : &Path, status : bool) -> Self{
  334. let path = &reorient(dir);
  335. if let Ok(repo) = git2::Repository::open(path) {
  336. let branch = current_branch(&repo);
  337. if !status{
  338. return Self{ status: None, branch };
  339. }
  340. match repo.statuses(None) {
  341. Ok(es) => {
  342. if es.iter().any(|s| s.status() != git2::Status::IGNORED) {
  343. return Self { status: Some(f::SubdirGitRepoStatus::GitDirty), branch };
  344. }
  345. return Self { status: Some(f::SubdirGitRepoStatus::GitClean), branch };
  346. }
  347. Err(e) => {
  348. error!("Error looking up Git statuses: {e:?}");
  349. }
  350. }
  351. }
  352. f::SubdirGitRepo {
  353. status: if status { Some(f::SubdirGitRepoStatus::NoRepo) } else { None },
  354. branch: None,
  355. }
  356. }
  357. }