From 61c2f6ec5d08cd825e5a3421b60fac3f01ac4dc6 Mon Sep 17 00:00:00 2001 From: Michael Wright Date: Sun, 24 Dec 2017 20:11:06 +0200 Subject: [PATCH 1/3] Reimplement with zip and two DirWalkers The current implementation has the problem that every matching file is opened and read twice. This is an alternate implementation that only checks every pair of files once. --- Cargo.toml | 2 +- src/lib.rs | 79 +++++++++++++++++------------------------------------- 2 files changed, 25 insertions(+), 56 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c02f349..911bf60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,4 +10,4 @@ readme = "README.md" tags = ["directory", "diff", "fs"] [dependencies] -walkdir = "1.0.7" +walkdir = "2.0.1" diff --git a/src/lib.rs b/src/lib.rs index 95a8d84..78655a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,7 @@ //! //! ```no_run //! extern crate dir_diff; -//! +//! //! assert!(dir_diff::is_different("dir/a", "dir/b").unwrap()); //! ``` @@ -16,8 +16,9 @@ extern crate walkdir; use std::fs::File; use std::io::prelude::*; use std::path::Path; +use std::cmp::Ordering; -use walkdir::WalkDir; +use walkdir::{DirEntry, WalkDir}; /// The various errors that can happen when diffing two directories #[derive(Debug)] @@ -33,68 +34,36 @@ pub enum Error { /// /// ```no_run /// extern crate dir_diff; -/// +/// /// assert!(dir_diff::is_different("dir/a", "dir/b").unwrap()); /// ``` pub fn is_different, B: AsRef>(a_base: A, b_base: B) -> Result { - let a_base = a_base.as_ref(); - let b_base = b_base.as_ref(); + let mut a_walker = walk_dir(a_base); + let mut b_walker = walk_dir(b_base); - // We start by checking the count in the top-level directories. There's two - // reasons for this: first, if a is empty, but b is not, our for loop will not - // catch it; it will execute zero times and return true, and that's wrong! - // Second, if they're differnet, there's no reason to bother comparing the - // files; we already know they're different. + for (a, b) in (&mut a_walker).zip(&mut b_walker) { + let a = a?; + let b = b?; - let a_count = std::fs::read_dir(&a_base)?.count(); - let b_count = std::fs::read_dir(&b_base)?.count(); - - if a_count != b_count { - return Ok(true); + if a.file_type() != b.file_type() || a.file_name() != b.file_name() + || (a.file_type().is_file() && read_to_vec(a.path())? != read_to_vec(b.path())?) + { + return Ok(true); + } } - Ok(compare(a_base, b_base)? || compare(b_base, a_base)?) + Ok(!a_walker.next().is_none() || !b_walker.next().is_none()) } -fn compare(a_base: &Path, b_base: &Path) -> Result { - // next, we walk all of the entries in a and compare them to b - for entry in WalkDir::new(a_base) { - let entry = entry?; - let a = entry.path(); - - // calculate just the part of the path relative to a - let no_prefix = a.strip_prefix(a_base)?; - - // and then join that with b to get the path in b - let b = b_base.join(no_prefix); - - if a.is_dir() { - if b.is_dir() { - // can't compare the contents of directories, so just continue - continue; - } else { - // if one is a file and one is a directory, we have a difference! - return Ok(true) - } - } - - // file a is guaranteed to exist... - let a_text = read_to_vec(a)?; - - // but file b is not. If we have any kind of error when loading - // it up, that's a positive result, not an actual error. - let b_text = match read_to_vec(b) { - Ok(contents) => contents, - Err(_) => return Ok(true), - }; - - if a_text != b_text { - return Ok(true); - } - } +fn walk_dir>(path: P) -> std::iter::Skip { + WalkDir::new(path) + .sort_by(compare_by_file_name) + .into_iter() + .skip(1) +} - // if we made it here, we're good! - Ok(false) +fn compare_by_file_name(a: &DirEntry, b: &DirEntry) -> Ordering { + a.file_name().cmp(b.file_name()) } fn read_to_vec>(file: P) -> Result, std::io::Error> { @@ -122,4 +91,4 @@ impl From for Error { fn from(e: walkdir::Error) -> Error { Error::WalkDir(e) } -} \ No newline at end of file +} From 6d92069f9a6ff1669dd2be348c26ddd393e0a5e2 Mon Sep 17 00:00:00 2001 From: Michael Wright Date: Fri, 29 Dec 2017 07:45:30 +0200 Subject: [PATCH 2/3] Fix file depth bug --- src/lib.rs | 3 ++- tests/filedepth/asc/dir1/a/b.txt | 1 + tests/filedepth/asc/dir2/b.txt | 1 + tests/filedepth/desc/dir1/a.txt | 1 + tests/filedepth/desc/dir2/b/a.txt | 1 + tests/smoke.rs | 26 +++++++++++++++++++++++++- 6 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/filedepth/asc/dir1/a/b.txt create mode 100644 tests/filedepth/asc/dir2/b.txt create mode 100644 tests/filedepth/desc/dir1/a.txt create mode 100644 tests/filedepth/desc/dir2/b/a.txt diff --git a/src/lib.rs b/src/lib.rs index 78655a8..1cce8bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,8 @@ pub fn is_different, B: AsRef>(a_base: A, b_base: B) -> Res let a = a?; let b = b?; - if a.file_type() != b.file_type() || a.file_name() != b.file_name() + if a.depth() != b.depth() || a.file_type() != b.file_type() + || a.file_name() != b.file_name() || (a.file_type().is_file() && read_to_vec(a.path())? != read_to_vec(b.path())?) { return Ok(true); diff --git a/tests/filedepth/asc/dir1/a/b.txt b/tests/filedepth/asc/dir1/a/b.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/filedepth/asc/dir1/a/b.txt @@ -0,0 +1 @@ +test diff --git a/tests/filedepth/asc/dir2/b.txt b/tests/filedepth/asc/dir2/b.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/filedepth/asc/dir2/b.txt @@ -0,0 +1 @@ +test diff --git a/tests/filedepth/desc/dir1/a.txt b/tests/filedepth/desc/dir1/a.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/filedepth/desc/dir1/a.txt @@ -0,0 +1 @@ +test diff --git a/tests/filedepth/desc/dir2/b/a.txt b/tests/filedepth/desc/dir2/b/a.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/filedepth/desc/dir2/b/a.txt @@ -0,0 +1 @@ +test diff --git a/tests/smoke.rs b/tests/smoke.rs index e003bbd..3415b6f 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -1,5 +1,9 @@ extern crate dir_diff; +use std::path::Path; +use std::fs::create_dir; +use std::io::ErrorKind; + #[test] fn easy_good() { assert!(!dir_diff::is_different("tests/easy/good/dir1", "tests/easy/good/dir2").unwrap()); @@ -33,4 +37,24 @@ fn oneempty() { #[test] fn reflexive() { assert!(dir_diff::is_different("tests/reflexive/dir1", "tests/reflexive/dir2").unwrap()); -} \ No newline at end of file +} + +fn ensure_dir>(path: P) -> Result<(), std::io::Error> { + match create_dir(path) { + Err(ref err) if err.kind() == ErrorKind::AlreadyExists => Ok(()), + other => other, + } +} + +#[test] +fn filedepth() { + ensure_dir("tests/filedepth/asc/dir2/a").unwrap(); + ensure_dir("tests/filedepth/desc/dir1/b").unwrap(); + + assert!( + dir_diff::is_different("tests/filedepth/asc/dir1", "tests/filedepth/asc/dir2").unwrap() + ); + assert!( + dir_diff::is_different("tests/filedepth/desc/dir1", "tests/filedepth/desc/dir2").unwrap() + ); +} From c8ab06d06b3366497200481cb5bee3c2da353360 Mon Sep 17 00:00:00 2001 From: Michael Wright Date: Fri, 29 Dec 2017 18:39:07 +0200 Subject: [PATCH 3/3] Added further tests when dirs differ --- tests/dirs_differ/dir1/dirA/test.txt | 1 + tests/dirs_differ/dir2/dirB/test.txt | 1 + tests/smoke.rs | 5 +++++ 3 files changed, 7 insertions(+) create mode 100644 tests/dirs_differ/dir1/dirA/test.txt create mode 100644 tests/dirs_differ/dir2/dirB/test.txt diff --git a/tests/dirs_differ/dir1/dirA/test.txt b/tests/dirs_differ/dir1/dirA/test.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/dirs_differ/dir1/dirA/test.txt @@ -0,0 +1 @@ +test diff --git a/tests/dirs_differ/dir2/dirB/test.txt b/tests/dirs_differ/dir2/dirB/test.txt new file mode 100644 index 0000000..9daeafb --- /dev/null +++ b/tests/dirs_differ/dir2/dirB/test.txt @@ -0,0 +1 @@ +test diff --git a/tests/smoke.rs b/tests/smoke.rs index 3415b6f..ce4d338 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -39,6 +39,11 @@ fn reflexive() { assert!(dir_diff::is_different("tests/reflexive/dir1", "tests/reflexive/dir2").unwrap()); } +#[test] +fn dirs_differ() { + assert!(dir_diff::is_different("tests/dirs_differ/dir1", "tests/dirs_differ/dir2").unwrap()); +} + fn ensure_dir>(path: P) -> Result<(), std::io::Error> { match create_dir(path) { Err(ref err) if err.kind() == ErrorKind::AlreadyExists => Ok(()),