Skip to content

Commit 9c4114e

Browse files
author
Michael Wright
committed
Reimplement with zip and two DirWalkers
The current implementation has the problem that every matching file is opened and read twice. This is an alternate implementation that only checks every pair of files once.
1 parent 1b2bfd0 commit 9c4114e

File tree

9 files changed

+62
-57
lines changed

9 files changed

+62
-57
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ readme = "README.md"
1010
tags = ["directory", "diff", "fs"]
1111

1212
[dependencies]
13-
walkdir = "1.0.7"
13+
walkdir = "2.0.1"

src/lib.rs

Lines changed: 25 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//!
88
//! ```no_run
99
//! extern crate dir_diff;
10-
//!
10+
//!
1111
//! assert!(dir_diff::is_different("dir/a", "dir/b").unwrap());
1212
//! ```
1313
@@ -16,8 +16,9 @@ extern crate walkdir;
1616
use std::fs::File;
1717
use std::io::prelude::*;
1818
use std::path::Path;
19+
use std::cmp::Ordering;
1920

20-
use walkdir::WalkDir;
21+
use walkdir::{DirEntry, WalkDir};
2122

2223
/// The various errors that can happen when diffing two directories
2324
#[derive(Debug)]
@@ -33,68 +34,37 @@ pub enum Error {
3334
///
3435
/// ```no_run
3536
/// extern crate dir_diff;
36-
///
37+
///
3738
/// assert!(dir_diff::is_different("dir/a", "dir/b").unwrap());
3839
/// ```
3940
pub fn is_different<A: AsRef<Path>, B: AsRef<Path>>(a_base: A, b_base: B) -> Result<bool, Error> {
40-
let a_base = a_base.as_ref();
41-
let b_base = b_base.as_ref();
41+
let mut a_walker = walk_dir(a_base);
42+
let mut b_walker = walk_dir(b_base);
4243

43-
// We start by checking the count in the top-level directories. There's two
44-
// reasons for this: first, if a is empty, but b is not, our for loop will not
45-
// catch it; it will execute zero times and return true, and that's wrong!
46-
// Second, if they're differnet, there's no reason to bother comparing the
47-
// files; we already know they're different.
44+
for (a, b) in (&mut a_walker).zip(&mut b_walker) {
45+
let a = a?;
46+
let b = b?;
4847

49-
let a_count = std::fs::read_dir(&a_base)?.count();
50-
let b_count = std::fs::read_dir(&b_base)?.count();
51-
52-
if a_count != b_count {
53-
return Ok(true);
48+
if a.depth() != b.depth() || a.file_type() != b.file_type()
49+
|| a.file_name() != b.file_name()
50+
|| (a.file_type().is_file() && read_to_vec(a.path())? != read_to_vec(b.path())?)
51+
{
52+
return Ok(true);
53+
}
5454
}
5555

56-
Ok(compare(a_base, b_base)? || compare(b_base, a_base)?)
56+
Ok(!a_walker.next().is_none() || !b_walker.next().is_none())
5757
}
5858

59-
fn compare(a_base: &Path, b_base: &Path) -> Result<bool, Error> {
60-
// next, we walk all of the entries in a and compare them to b
61-
for entry in WalkDir::new(a_base) {
62-
let entry = entry?;
63-
let a = entry.path();
64-
65-
// calculate just the part of the path relative to a
66-
let no_prefix = a.strip_prefix(a_base)?;
67-
68-
// and then join that with b to get the path in b
69-
let b = b_base.join(no_prefix);
70-
71-
if a.is_dir() {
72-
if b.is_dir() {
73-
// can't compare the contents of directories, so just continue
74-
continue;
75-
} else {
76-
// if one is a file and one is a directory, we have a difference!
77-
return Ok(true)
78-
}
79-
}
80-
81-
// file a is guaranteed to exist...
82-
let a_text = read_to_vec(a)?;
83-
84-
// but file b is not. If we have any kind of error when loading
85-
// it up, that's a positive result, not an actual error.
86-
let b_text = match read_to_vec(b) {
87-
Ok(contents) => contents,
88-
Err(_) => return Ok(true),
89-
};
90-
91-
if a_text != b_text {
92-
return Ok(true);
93-
}
94-
}
59+
fn walk_dir<P: AsRef<Path>>(path: P) -> std::iter::Skip<walkdir::IntoIter> {
60+
WalkDir::new(path)
61+
.sort_by(compare_by_file_name)
62+
.into_iter()
63+
.skip(1)
64+
}
9565

96-
// if we made it here, we're good!
97-
Ok(false)
66+
fn compare_by_file_name(a: &DirEntry, b: &DirEntry) -> Ordering {
67+
a.file_name().cmp(b.file_name())
9868
}
9969

10070
fn read_to_vec<P: AsRef<Path>>(file: P) -> Result<Vec<u8>, std::io::Error> {
@@ -122,4 +92,4 @@ impl From<walkdir::Error> for Error {
12292
fn from(e: walkdir::Error) -> Error {
12393
Error::WalkDir(e)
12494
}
125-
}
95+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test

tests/filedepth/asc/dir1/a/b.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test

tests/filedepth/asc/dir2/b.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test

tests/filedepth/desc/dir1/a.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test

tests/filedepth/desc/dir2/b/a.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test

tests/smoke.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
extern crate dir_diff;
22

3+
use std::path::Path;
4+
use std::fs::create_dir;
5+
use std::io::ErrorKind;
6+
37
#[test]
48
fn easy_good() {
59
assert!(!dir_diff::is_different("tests/easy/good/dir1", "tests/easy/good/dir2").unwrap());
@@ -33,4 +37,29 @@ fn oneempty() {
3337
#[test]
3438
fn reflexive() {
3539
assert!(dir_diff::is_different("tests/reflexive/dir1", "tests/reflexive/dir2").unwrap());
36-
}
40+
}
41+
42+
#[test]
43+
fn dirs_differ() {
44+
assert!(dir_diff::is_different("tests/dirs_differ/dir1", "tests/dirs_differ/dir2").unwrap());
45+
}
46+
47+
fn ensure_dir<P: AsRef<Path>>(path: P) -> Result<(), std::io::Error> {
48+
match create_dir(path) {
49+
Err(ref err) if err.kind() == ErrorKind::AlreadyExists => Ok(()),
50+
other => other,
51+
}
52+
}
53+
54+
#[test]
55+
fn filedepth() {
56+
ensure_dir("tests/filedepth/asc/dir2/a").unwrap();
57+
ensure_dir("tests/filedepth/desc/dir1/b").unwrap();
58+
59+
assert!(
60+
dir_diff::is_different("tests/filedepth/asc/dir1", "tests/filedepth/asc/dir2").unwrap()
61+
);
62+
assert!(
63+
dir_diff::is_different("tests/filedepth/desc/dir1", "tests/filedepth/desc/dir2").unwrap()
64+
);
65+
}

0 commit comments

Comments
 (0)