git.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. //! Getting the Git status of files and directories.
  2. use std::ffi::OsStr;
  3. #[cfg(target_family = "unix")]
  4. use std::os::unix::ffi::OsStrExt;
  5. use std::path::{Path, PathBuf};
  6. use std::sync::Mutex;
  7. use log::*;
  8. use crate::fs::fields as f;
  9. /// A **Git cache** is assembled based on the user’s input arguments.
  10. ///
  11. /// This uses vectors to avoid the overhead of hashing: it’s not worth it when the
  12. /// expected number of Git repositories per exa invocation is 0 or 1...
  13. pub struct GitCache {
  14. /// A list of discovered Git repositories and their paths.
  15. repos: Vec<GitRepo>,
  16. /// Paths that we’ve confirmed do not have Git repositories underneath them.
  17. misses: Vec<PathBuf>,
  18. }
  19. impl GitCache {
  20. pub fn has_anything_for(&self, index: &Path) -> bool {
  21. self.repos.iter().any(|e| e.has_path(index))
  22. }
  23. pub fn get(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  24. self.repos.iter()
  25. .find(|e| e.has_path(index))
  26. .map(|repo| repo.search(index, prefix_lookup))
  27. .unwrap_or_default()
  28. }
  29. }
  30. use std::iter::FromIterator;
  31. impl FromIterator<PathBuf> for GitCache {
  32. fn from_iter<I>(iter: I) -> Self
  33. where I: IntoIterator<Item=PathBuf>
  34. {
  35. let iter = iter.into_iter();
  36. let mut git = Self {
  37. repos: Vec::with_capacity(iter.size_hint().0),
  38. misses: Vec::new(),
  39. };
  40. for path in iter {
  41. if git.misses.contains(&path) {
  42. debug!("Skipping {:?} because it already came back Gitless", path);
  43. }
  44. else if git.repos.iter().any(|e| e.has_path(&path)) {
  45. debug!("Skipping {:?} because we already queried it", path);
  46. }
  47. else {
  48. match GitRepo::discover(path) {
  49. Ok(r) => {
  50. if let Some(r2) = git.repos.iter_mut().find(|e| e.has_workdir(&r.workdir)) {
  51. debug!("Adding to existing repo (workdir matches with {:?})", r2.workdir);
  52. r2.extra_paths.push(r.original_path);
  53. continue;
  54. }
  55. debug!("Discovered new Git repo");
  56. git.repos.push(r);
  57. }
  58. Err(miss) => {
  59. git.misses.push(miss)
  60. }
  61. }
  62. }
  63. }
  64. git
  65. }
  66. }
  67. /// A **Git repository** is one we’ve discovered somewhere on the filesystem.
  68. pub struct GitRepo {
  69. /// The queryable contents of the repository: either a `git2` repo, or the
  70. /// cached results from when we queried it last time.
  71. contents: Mutex<GitContents>,
  72. /// The working directory of this repository.
  73. /// This is used to check whether two repositories are the same.
  74. workdir: PathBuf,
  75. /// The path that was originally checked to discover this repository.
  76. /// This is as important as the extra_paths (it gets checked first), but
  77. /// is separate to avoid having to deal with a non-empty Vec.
  78. original_path: PathBuf,
  79. /// Any other paths that were checked only to result in this same
  80. /// repository.
  81. extra_paths: Vec<PathBuf>,
  82. }
  83. /// A repository’s queried state.
  84. enum GitContents {
  85. /// All the interesting Git stuff goes through this.
  86. Before {
  87. repo: git2::Repository,
  88. },
  89. /// Temporary value used in `repo_to_statuses` so we can move the
  90. /// repository out of the `Before` variant.
  91. Processing,
  92. /// The data we’ve extracted from the repository, but only after we’ve
  93. /// actually done so.
  94. After {
  95. statuses: Git,
  96. },
  97. }
  98. impl GitRepo {
  99. /// Searches through this repository for a path (to a file or directory,
  100. /// depending on the prefix-lookup flag) and returns its Git status.
  101. ///
  102. /// Actually querying the `git2` repository for the mapping of paths to
  103. /// Git statuses is only done once, and gets cached so we don’t need to
  104. /// re-query the entire repository the times after that.
  105. ///
  106. /// The temporary `Processing` enum variant is used after the `git2`
  107. /// repository is moved out, but before the results have been moved in!
  108. /// See <https://stackoverflow.com/q/45985827/3484614>
  109. fn search(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  110. use std::mem::replace;
  111. let mut contents = self.contents.lock().unwrap();
  112. if let GitContents::After { ref statuses } = *contents {
  113. debug!("Git repo {:?} has been found in cache", &self.workdir);
  114. return statuses.status(index, prefix_lookup);
  115. }
  116. debug!("Querying Git repo {:?} for the first time", &self.workdir);
  117. let repo = replace(&mut *contents, GitContents::Processing).inner_repo();
  118. let statuses = repo_to_statuses(&repo, &self.workdir);
  119. let result = statuses.status(index, prefix_lookup);
  120. let _processing = replace(&mut *contents, GitContents::After { statuses });
  121. result
  122. }
  123. /// Whether this repository has the given working directory.
  124. fn has_workdir(&self, path: &Path) -> bool {
  125. self.workdir == path
  126. }
  127. /// Whether this repository cares about the given path at all.
  128. fn has_path(&self, path: &Path) -> bool {
  129. path.starts_with(&self.original_path) || self.extra_paths.iter().any(|e| path.starts_with(e))
  130. }
  131. /// Searches for a Git repository at any point above the given path.
  132. /// Returns the original buffer if none is found.
  133. fn discover(path: PathBuf) -> Result<Self, PathBuf> {
  134. info!("Searching for Git repository above {:?}", path);
  135. // Search with GIT_DIR env variable first if set
  136. let repo = match git2::Repository::open_from_env() {
  137. Ok(r) => r,
  138. Err(e) => {
  139. // anything other than NotFound implies GIT_DIR was set and we got actual error
  140. if e.code() != git2::ErrorCode::NotFound {
  141. error!("Error opening Git repo from env using GIT_DIR: {:?}", e);
  142. return Err(path);
  143. } else {
  144. // nothing found, search using discover
  145. match git2::Repository::discover(&path) {
  146. Ok(r) => r,
  147. Err(e) => {
  148. error!("Error discovering Git repositories: {:?}", e);
  149. return Err(path);
  150. }
  151. }
  152. }
  153. }
  154. };
  155. if let Some(workdir) = repo.workdir() {
  156. let workdir = workdir.to_path_buf();
  157. let contents = Mutex::new(GitContents::Before { repo });
  158. Ok(Self { contents, workdir, original_path: path, extra_paths: Vec::new() })
  159. }
  160. else {
  161. warn!("Repository has no workdir?");
  162. Err(path)
  163. }
  164. }
  165. }
  166. impl GitContents {
  167. /// Assumes that the repository hasn’t been queried, and extracts it
  168. /// (consuming the value) if it has. This is needed because the entire
  169. /// enum variant gets replaced when a repo is queried (see above).
  170. fn inner_repo(self) -> git2::Repository {
  171. if let Self::Before { repo } = self {
  172. repo
  173. }
  174. else {
  175. unreachable!("Tried to extract a non-Repository")
  176. }
  177. }
  178. }
  179. /// Iterates through a repository’s statuses, consuming it and returning the
  180. /// mapping of files to their Git status.
  181. /// We will have already used the working directory at this point, so it gets
  182. /// passed in rather than deriving it from the `Repository` again.
  183. fn repo_to_statuses(repo: &git2::Repository, workdir: &Path) -> Git {
  184. let mut statuses = Vec::new();
  185. info!("Getting Git statuses for repo with workdir {:?}", workdir);
  186. match repo.statuses(None) {
  187. Ok(es) => {
  188. for e in es.iter() {
  189. #[cfg(target_family = "unix")]
  190. let path = workdir.join(Path::new(OsStr::from_bytes(e.path_bytes())));
  191. // TODO: handle non Unix systems better:
  192. // https://github.com/ogham/exa/issues/698
  193. #[cfg(not(target_family = "unix"))]
  194. let path = workdir.join(Path::new(e.path().unwrap()));
  195. let elem = (path, e.status());
  196. statuses.push(elem);
  197. }
  198. }
  199. Err(e) => {
  200. error!("Error looking up Git statuses: {:?}", e);
  201. }
  202. }
  203. Git { statuses }
  204. }
  205. // The `repo.statuses` call above takes a long time. exa debug output:
  206. //
  207. // 20.311276 INFO:exa::fs::feature::git: Getting Git statuses for repo with workdir "/vagrant/"
  208. // 20.799610 DEBUG:exa::output::table: Getting Git status for file "./Cargo.toml"
  209. //
  210. // Even inserting another logging line immediately afterwards doesn’t make it
  211. // look any faster.
  212. /// Container of Git statuses for all the files in this folder’s Git repository.
  213. struct Git {
  214. statuses: Vec<(PathBuf, git2::Status)>,
  215. }
  216. impl Git {
  217. /// Get either the file or directory status for the given path.
  218. /// “Prefix lookup” means that it should report an aggregate status of all
  219. /// paths starting with the given prefix (in other words, a directory).
  220. fn status(&self, index: &Path, prefix_lookup: bool) -> f::Git {
  221. if prefix_lookup { self.dir_status(index) }
  222. else { self.file_status(index) }
  223. }
  224. /// Get the user-facing status of a file.
  225. /// We check the statuses directly applying to a file, and for the ignored
  226. /// status we check if any of its parents directories is ignored by git.
  227. fn file_status(&self, file: &Path) -> f::Git {
  228. let path = reorient(file);
  229. let s = self.statuses.iter()
  230. .filter(|p| if p.1 == git2::Status::IGNORED {
  231. path.starts_with(&p.0)
  232. } else {
  233. p.0 == path
  234. })
  235. .fold(git2::Status::empty(), |a, b| a | b.1);
  236. let staged = index_status(s);
  237. let unstaged = working_tree_status(s);
  238. f::Git { staged, unstaged }
  239. }
  240. /// Get the combined, user-facing status of a directory.
  241. /// Statuses are aggregating (for example, a directory is considered
  242. /// modified if any file under it has the status modified), except for
  243. /// ignored status which applies to files under (for example, a directory
  244. /// is considered ignored if one of its parent directories is ignored).
  245. fn dir_status(&self, dir: &Path) -> f::Git {
  246. let path = reorient(dir);
  247. let s = self.statuses.iter()
  248. .filter(|p| if p.1 == git2::Status::IGNORED {
  249. path.starts_with(&p.0)
  250. } else {
  251. p.0.starts_with(&path)
  252. })
  253. .fold(git2::Status::empty(), |a, b| a | b.1);
  254. let staged = index_status(s);
  255. let unstaged = working_tree_status(s);
  256. f::Git { staged, unstaged }
  257. }
  258. }
  259. /// Converts a path to an absolute path based on the current directory.
  260. /// Paths need to be absolute for them to be compared properly, otherwise
  261. /// you’d ask a repo about “./README.md” but it only knows about
  262. /// “/vagrant/README.md”, prefixed by the workdir.
  263. #[cfg(unix)]
  264. fn reorient(path: &Path) -> PathBuf {
  265. use std::env::current_dir;
  266. // TODO: I’m not 100% on this func tbh
  267. let path = match current_dir() {
  268. Err(_) => Path::new(".").join(&path),
  269. Ok(dir) => dir.join(&path),
  270. };
  271. path.canonicalize().unwrap_or(path)
  272. }
  273. #[cfg(windows)]
  274. fn reorient(path: &Path) -> PathBuf {
  275. let unc_path = path.canonicalize().unwrap();
  276. // On Windows UNC path is returned. We need to strip the prefix for it to work.
  277. let normal_path = unc_path.as_os_str().to_str().unwrap().trim_left_matches("\\\\?\\");
  278. return PathBuf::from(normal_path);
  279. }
  280. /// The character to display if the file has been modified, but not staged.
  281. fn working_tree_status(status: git2::Status) -> f::GitStatus {
  282. match status {
  283. s if s.contains(git2::Status::WT_NEW) => f::GitStatus::New,
  284. s if s.contains(git2::Status::WT_MODIFIED) => f::GitStatus::Modified,
  285. s if s.contains(git2::Status::WT_DELETED) => f::GitStatus::Deleted,
  286. s if s.contains(git2::Status::WT_RENAMED) => f::GitStatus::Renamed,
  287. s if s.contains(git2::Status::WT_TYPECHANGE) => f::GitStatus::TypeChange,
  288. s if s.contains(git2::Status::IGNORED) => f::GitStatus::Ignored,
  289. s if s.contains(git2::Status::CONFLICTED) => f::GitStatus::Conflicted,
  290. _ => f::GitStatus::NotModified,
  291. }
  292. }
  293. /// The character to display if the file has been modified and the change
  294. /// has been staged.
  295. fn index_status(status: git2::Status) -> f::GitStatus {
  296. match status {
  297. s if s.contains(git2::Status::INDEX_NEW) => f::GitStatus::New,
  298. s if s.contains(git2::Status::INDEX_MODIFIED) => f::GitStatus::Modified,
  299. s if s.contains(git2::Status::INDEX_DELETED) => f::GitStatus::Deleted,
  300. s if s.contains(git2::Status::INDEX_RENAMED) => f::GitStatus::Renamed,
  301. s if s.contains(git2::Status::INDEX_TYPECHANGE) => f::GitStatus::TypeChange,
  302. _ => f::GitStatus::NotModified,
  303. }
  304. }
  305. fn current_branch(repo: &git2::Repository) -> Option<String>{
  306. let head = match repo.head() {
  307. Ok(head) => Some(head),
  308. Err(ref e) if e.code() == git2::ErrorCode::UnbornBranch || e.code() == git2::ErrorCode::NotFound => return None,
  309. Err(e) => {
  310. error!("Error looking up Git branch: {:?}", e);
  311. return None
  312. }
  313. };
  314. if let Some(h) = head{
  315. if let Some(s) = h.shorthand(){
  316. let branch_name = s.to_owned();
  317. if branch_name.len() > 10 {
  318. return Some(branch_name[..8].to_string()+"..");
  319. }
  320. return Some(branch_name);
  321. }
  322. }
  323. None
  324. }
  325. impl f::SubdirGitRepo{
  326. pub fn from_path(dir : &Path, status : bool) -> Self{
  327. let path = &reorient(&dir);
  328. let g = git2::Repository::open(path);
  329. if let Ok(repo) = g{
  330. let branch = current_branch(&repo);
  331. if !status{
  332. return Self{status : f::SubdirGitRepoStatus::GitUnknown, branch};
  333. }
  334. match repo.statuses(None) {
  335. Ok(es) => {
  336. if es.iter().filter(|s| s.status() != git2::Status::IGNORED).any(|_| true){
  337. return Self{status : f::SubdirGitRepoStatus::GitDirty, branch};
  338. }
  339. return Self{status : f::SubdirGitRepoStatus::GitClean, branch};
  340. }
  341. Err(e) => {
  342. error!("Error looking up Git statuses: {:?}", e)
  343. }
  344. }
  345. }
  346. Self::default()
  347. }
  348. }