Преглед изворни кода

Properly handle invalid UTF-8 filenames

Ben S пре 11 година
родитељ
комит
df65886d6d
4 измењених фајлова са 66 додато и 53 уклоњено
  1. 46 35
      src/file.rs
  2. 4 2
      src/filetype.rs
  3. 3 3
      src/options.rs
  4. 13 13
      src/sort.rs

+ 46 - 35
src/file.rs

@@ -1,6 +1,7 @@
 use colours::{Plain, Style, Black, Red, Green, Yellow, Blue, Purple, Cyan, Fixed};
 use std::io::{fs, IoResult};
 use std::io;
+use std::str::{from_utf8, from_utf8_lossy};
 
 use column::{Column, Permissions, FileName, FileSize, User, Group, HardLinks, Inode, Blocks};
 use format::{format_metric_bytes, format_IEC_bytes};
@@ -17,9 +18,9 @@ use filetype::HasType;
 // the actual path.
 
 pub struct File<'a> {
-    pub name:  &'a str,
+    pub name:  String,
     pub dir:   &'a Dir<'a>,
-    pub ext:   Option<&'a str>,
+    pub ext:   Option<String>,
     pub path:  &'a Path,
     pub stat:  io::FileStat,
     pub parts: Vec<SortPart>,
@@ -27,10 +28,12 @@ pub struct File<'a> {
 
 impl<'a> File<'a> {
     pub fn from_path(path: &'a Path, parent: &'a Dir) -> IoResult<File<'a>> {
-        // Getting the string from a filename fails whenever it's not
-        // UTF-8 representable - just assume it is for now.
-        let filename: &str = path.filename_str().unwrap();
-
+        let v = path.filename().unwrap();  // fails if / or . or ..
+        let filename = match from_utf8(v) {
+            Some(name) => name.to_string(),
+            None => from_utf8_lossy(v).to_string(),
+        };
+        
         // Use lstat here instead of file.stat(), as it doesn't follow
         // symbolic links. Otherwise, the stat() call will fail if it
         // encounters a link that's target is non-existent.
@@ -39,26 +42,27 @@ impl<'a> File<'a> {
             path:  path,
             dir:   parent,
             stat:  stat,
-            name:  filename,
-            ext:   File::ext(filename),
-            parts: SortPart::split_into_parts(filename),
+            name:  filename.clone(),
+            ext:   File::ext(filename.clone()),
+            parts: SortPart::split_into_parts(filename.clone()),
         })
     }
 
-    fn ext(name: &'a str) -> Option<&'a str> {
+    fn ext(name: String) -> Option<String> {
         // The extension is the series of characters after a dot at
         // the end of a filename. This deliberately also counts
         // dotfiles - the ".git" folder has the extension "git".
         let re = regex!(r"\.([^.]+)$");
-        re.captures(name).map(|caps| caps.at(1))
+        re.captures(name.as_slice()).map(|caps| caps.at(1).to_string())
     }
 
     pub fn is_dotfile(&self) -> bool {
-        self.name.starts_with(".")
+        self.name.as_slice().starts_with(".")
     }
 
     pub fn is_tmpfile(&self) -> bool {
-        self.name.ends_with("~") || (self.name.starts_with("#") && self.name.ends_with("#"))
+        let name = self.name.as_slice();
+        name.ends_with("~") || (name.starts_with("#") && name.ends_with("#"))
     }
 
     // Highlight the compiled versions of files. Some of them, like .o,
@@ -68,22 +72,23 @@ impl<'a> File<'a> {
     // without a .coffee.
 
     pub fn get_source_files(&self) -> Vec<Path> {
-        match self.ext {
-            Some("class") => vec![self.path.with_extension("java")],  // Java
-            Some("elc") => vec![self.path.with_extension("el")],  // Emacs Lisp
-            Some("hi") => vec![self.path.with_extension("hs")],  // Haskell
-            Some("o") => vec![self.path.with_extension("c"), self.path.with_extension("cpp")],  // C, C++
-            Some("pyc") => vec![self.path.with_extension("py")],  // Python
-            Some("js") => vec![self.path.with_extension("coffee"), self.path.with_extension("ts")],  // CoffeeScript, TypeScript
-            Some("css") => vec![self.path.with_extension("sass"), self.path.with_extension("less")],  // SASS, Less
-
-            Some("aux") => vec![self.path.with_extension("tex")],  // TeX: auxiliary file
-            Some("bbl") => vec![self.path.with_extension("tex")],  // BibTeX bibliography file
-            Some("blg") => vec![self.path.with_extension("tex")],  // BibTeX log file
-            Some("lof") => vec![self.path.with_extension("tex")],  // list of figures
-            Some("log") => vec![self.path.with_extension("tex")],  // TeX log file
-            Some("lot") => vec![self.path.with_extension("tex")],  // list of tables
-            Some("toc") => vec![self.path.with_extension("tex")],  // table of contents
+        let ext = self.ext.clone().unwrap();
+        match ext.as_slice() {
+            "class" => vec![self.path.with_extension("java")],  // Java
+            "elc" => vec![self.path.with_extension("el")],  // Emacs Lisp
+            "hi" => vec![self.path.with_extension("hs")],  // Haskell
+            "o" => vec![self.path.with_extension("c"), self.path.with_extension("cpp")],  // C, C++
+            "pyc" => vec![self.path.with_extension("py")],  // Python
+            "js" => vec![self.path.with_extension("coffee"), self.path.with_extension("ts")],  // CoffeeScript, TypeScript
+            "css" => vec![self.path.with_extension("sass"), self.path.with_extension("less")],  // SASS, Less
+
+            "aux" => vec![self.path.with_extension("tex")],  // TeX: auxiliary file
+            "bbl" => vec![self.path.with_extension("tex")],  // BibTeX bibliography file
+            "blg" => vec![self.path.with_extension("tex")],  // BibTeX log file
+            "lof" => vec![self.path.with_extension("tex")],  // list of figures
+            "log" => vec![self.path.with_extension("tex")],  // TeX log file
+            "lot" => vec![self.path.with_extension("tex")],  // list of tables
+            "toc" => vec![self.path.with_extension("tex")],  // table of contents
 
             _ => vec![],
         }
@@ -133,7 +138,8 @@ impl<'a> File<'a> {
     }
 
     fn file_name(&self) -> String {
-        let displayed_name = self.file_colour().paint(self.name);
+        let name = self.name.as_slice();
+        let displayed_name = self.file_colour().paint(name);
         if self.stat.kind == io::TypeSymlink {
             match fs::readlink(self.path) {
                 Ok(path) => {
@@ -149,13 +155,18 @@ impl<'a> File<'a> {
     }
 
     fn target_file_name_and_arrow(&self, target_path: Path) -> String {
-        let filename = target_path.as_str().unwrap();
+        let v = target_path.filename().unwrap();
+        let filename = match from_utf8(v) {
+            Some(name) => name.to_string(),
+            None => from_utf8_lossy(v).to_string(),
+        };
+        
         let link_target = fs::stat(&target_path).map(|stat| File {
             path:  &target_path,
             dir:   self.dir,
             stat:  stat,
-            name:  filename,
-            ext:   File::ext(filename),
+            name:  filename.clone(),
+            ext:   File::ext(filename.clone()),
             parts: vec![],  // not needed
         });
 
@@ -166,8 +177,8 @@ impl<'a> File<'a> {
         // that reason anyway.
 
         match link_target {
-            Ok(file) => format!("{} {}", Fixed(244).paint("=>"), file.file_colour().paint(filename)),
-            Err(_)   => format!("{} {}", Red.paint("=>"), Red.underline().paint(filename)),
+            Ok(file) => format!("{} {}", Fixed(244).paint("=>"), file.file_colour().paint(filename.as_slice())),
+            Err(_)   => format!("{} {}", Red.paint("=>"), Red.underline().paint(filename.as_slice())),
         }
     }
 

+ 4 - 2
src/filetype.rs

@@ -73,6 +73,7 @@ pub trait HasType {
 
 impl<'a> HasType for File<'a> {
     fn get_type(&self) -> FileType {
+        let name = self.name.as_slice();
         if self.stat.kind == io::TypeDirectory {
             return Directory;
         }
@@ -85,11 +86,12 @@ impl<'a> HasType for File<'a> {
         else if self.stat.perm.contains(io::UserExecute) {
             return Executable;
         }
-        else if self.name.starts_with("README") || BUILD_TYPES.iter().any(|&s| s == self.name) {
+        else if name.starts_with("README") || BUILD_TYPES.iter().any(|&s| s == name) {
             return Immediate;
         }
         else if self.ext.is_some() {
-            let ext = self.ext.unwrap();
+            let e = self.ext.clone().unwrap();
+            let ext = e.as_slice();
             if IMAGE_TYPES.iter().any(|&s| s == ext) {
                 return Image;
             }

+ 3 - 3
src/options.rs

@@ -90,7 +90,7 @@ impl Options {
         if self.showInvisibles {
             true
         } else {
-            !f.name.starts_with(".")
+            !f.name.as_slice().starts_with(".")
         }
     }
 
@@ -103,8 +103,8 @@ impl Options {
             Name => files.sort_by(|a, b| a.parts.cmp(&b.parts)),
             Size => files.sort_by(|a, b| a.stat.size.cmp(&b.stat.size)),
             Extension => files.sort_by(|a, b| {
-                let exts = a.ext.map(|e| e.to_ascii_lower()).cmp(&b.ext.map(|e| e.to_ascii_lower()));
-                let names = a.name.to_ascii_lower().cmp(&b.name.to_ascii_lower());
+                let exts = a.ext.clone().map(|e| e.as_slice().to_ascii_lower()).cmp(&b.ext.clone().map(|e| e.as_slice().to_ascii_lower()));
+                let names = a.name.as_slice().to_ascii_lower().cmp(&b.name.as_slice().to_ascii_lower());
                 lexical_ordering(exts, names)
             }),
         }

+ 13 - 13
src/sort.rs

@@ -16,7 +16,7 @@ pub enum SortPart {
 }
 
 impl SortPart {
-    pub fn from_string(is_digit: bool, slice: &str) -> SortPart {
+    fn from_string(is_digit: bool, slice: &str) -> SortPart {
         if is_digit {
             // numbers too big for a u64 fall back into strings.
             match from_str::<u64>(slice) {
@@ -31,68 +31,68 @@ impl SortPart {
     // The logic here is taken from my question at
     // http://stackoverflow.com/q/23969191/3484614
 
-    pub fn split_into_parts(input: &str) -> Vec<SortPart> {
+    pub fn split_into_parts(input: String) -> Vec<SortPart> {
         let mut parts = vec![];
 
         if input.is_empty() {
             return parts
         }
 
-        let mut is_digit = input.char_at(0).is_digit();
+        let mut is_digit = input.as_slice().char_at(0).is_digit();
         let mut start = 0;
 
-        for (i, c) in input.char_indices() {
+        for (i, c) in input.as_slice().char_indices() {
             if is_digit != c.is_digit() {
-                parts.push(SortPart::from_string(is_digit, input.slice(start, i)));
+                parts.push(SortPart::from_string(is_digit, input.as_slice().slice(start, i)));
                 is_digit = !is_digit;
                 start = i;
             }
         }
 
-        parts.push(SortPart::from_string(is_digit, input.slice_from(start)));
+        parts.push(SortPart::from_string(is_digit, input.as_slice().slice_from(start)));
         parts
     }
 }
 
 #[test]
 fn test_numeric() {
-    let bits = SortPart::split_into_parts("123456789".as_slice());
+    let bits = SortPart::split_into_parts("123456789".to_string());
     assert!(bits == vec![ Numeric(123456789) ]);
 }
 
 
 #[test]
 fn test_stringular() {
-    let bits = SortPart::split_into_parts("toothpaste".as_slice());
+    let bits = SortPart::split_into_parts("toothpaste".to_string());
     assert!(bits == vec![ Stringular("toothpaste".to_string()) ]);
 }
 
 #[test]
 fn test_empty() {
-    let bits = SortPart::split_into_parts("".as_slice());
+    let bits = SortPart::split_into_parts("".to_string());
     assert!(bits == vec![]);
 }
 
 #[test]
 fn test_one() {
-    let bits = SortPart::split_into_parts("123abc123".as_slice());
+    let bits = SortPart::split_into_parts("123abc123".to_string());
     assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]);
 }
 
 #[test]
 fn test_two() {
-    let bits = SortPart::split_into_parts("final version 3.pdf".as_slice());
+    let bits = SortPart::split_into_parts("final version 3.pdf".to_string());
     assert!(bits == vec![ Stringular("final version ".to_string()), Numeric(3), Stringular(".pdf".to_string()) ]);
 }
 
 #[test]
 fn test_huge_number() {
-    let bits = SortPart::split_into_parts("9999999999999999999999999999999999999999999999999999999".as_slice());
+    let bits = SortPart::split_into_parts("9999999999999999999999999999999999999999999999999999999".to_string());
     assert!(bits == vec![ Stringular("9999999999999999999999999999999999999999999999999999999".to_string()) ]);
 }
 
 #[test]
 fn test_case() {
-    let bits = SortPart::split_into_parts("123ABC123".as_slice());
+    let bits = SortPart::split_into_parts("123ABC123".to_string());
     assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]);
 }