diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a57192a..7be99c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,7 +167,7 @@ jobs: grcov . --output-type lcov --output-path "${COVERAGE_REPORT_FILE}" --binary-path "${COVERAGE_REPORT_DIR}" --branch echo "report=${COVERAGE_REPORT_FILE}" >> $GITHUB_OUTPUT - name: Upload coverage results (to Codecov.io) - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@v6 with: token: ${{ secrets.CODECOV_TOKEN }} files: ${{ steps.coverage.outputs.report }} diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml index b799ca0..cb699b7 100644 --- a/.github/workflows/codspeed.yml +++ b/.github/workflows/codspeed.yml @@ -1,3 +1,5 @@ +# spell-checker:ignore backtest moonrepo + name: CodSpeed on: @@ -17,8 +19,19 @@ jobs: codspeed: name: Run benchmarks runs-on: ubuntu-latest + env: + CARGO_INCREMENTAL: 0 + strategy: + matrix: + type: [simulation, memory] + package: [ + uu_cmp, + uu_diff, + ] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + with: + persist-credentials: false - name: Setup rust toolchain, cache and cargo-codspeed binary uses: moonrepo/setup-rust@v1 @@ -27,11 +40,20 @@ jobs: cache-target: release bins: cargo-codspeed - - name: Build the benchmark target(s) - run: cargo codspeed build -m simulation + - name: Build benchmarks for ${{ matrix.package }} (${{ matrix.type }}) + shell: bash + run: | + echo "Building ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed build -m ${{ matrix.type }} -p ${{ matrix.package }} - - name: Run the benchmarks + - name: Run ${{ matrix.type }} benchmarks for ${{ matrix.package }} uses: CodSpeedHQ/action@v4 + env: + CODSPEED_LOG: debug with: - mode: simulation - run: cargo codspeed run + mode: ${{ matrix.type }} + run: | + echo "Running ${{ matrix.type }} benchmarks for ${{ matrix.package }}" + cargo codspeed run -p ${{ matrix.package }} > /dev/null + token: ${{ secrets.CODSPEED_TOKEN }} + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3c59af5..d9aa406 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -56,7 +56,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -64,9 +64,9 @@ jobs: # we specify bash to get pipefail; it guards against the `curl` command # failing. otherwise `sh` won't catch that `curl` returned non-0 shell: bash - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.30.3/cargo-dist-installer.sh | sh" + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.31.0/cargo-dist-installer.sh | sh" - name: Cache dist - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: cargo-dist-cache path: ~/.cargo/bin/dist @@ -82,7 +82,7 @@ jobs: cat plan-dist-manifest.json echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-plan-dist-manifest path: plan-dist-manifest.json @@ -116,7 +116,7 @@ jobs: - name: enable windows longpaths run: | git config --global core.longpaths true - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive @@ -131,7 +131,7 @@ jobs: run: ${{ matrix.install_dist.run }} # Get the dist-manifest - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -158,7 +158,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-local-${{ join(matrix.targets, '_') }} path: | @@ -175,19 +175,19 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Get all the local artifacts for the global tasks to use (for e.g. checksums) - name: Fetch local artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -205,7 +205,7 @@ jobs: cp dist-manifest.json "$BUILD_MANIFEST_NAME" - name: "Upload artifacts" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: artifacts-build-global path: | @@ -225,19 +225,19 @@ jobs: outputs: val: ${{ steps.host.outputs.manifest }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive - name: Install cached dist - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: cargo-dist-cache path: ~/.cargo/bin/ - run: chmod +x ~/.cargo/bin/dist # Fetch artifacts from scratch-storage - name: Fetch artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: target/distrib/ @@ -250,14 +250,14 @@ jobs: cat dist-manifest.json echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" - name: "Upload dist-manifest.json" - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: # Overwrite the previous copy name: artifacts-dist-manifest path: dist-manifest.json # Create a GitHub Release while uploading all files to it - name: "Download GitHub Artifacts" - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: pattern: artifacts-* path: artifacts @@ -290,7 +290,7 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: persist-credentials: false submodules: recursive diff --git a/.vscode/cSpell.json b/.vscode/cSpell.json new file mode 100644 index 0000000..a5feec3 --- /dev/null +++ b/.vscode/cSpell.json @@ -0,0 +1,62 @@ +// `cspell` settings +// spell-checker:ignore oranda +{ + // version of the setting file + "version": "0.2", + // spelling language + "language": "en", + // custom dictionaries + "dictionaries": [ + "acronyms+names", + "jargon", + "people", + "shell", + "workspace" + ], + "dictionaryDefinitions": [ + { + "name": "acronyms+names", + "path": "./cspell.dictionaries/acronyms+names.wordlist.txt" + }, + { + "name": "jargon", + "path": "./cspell.dictionaries/jargon.wordlist.txt" + }, + { + "name": "people", + "path": "./cspell.dictionaries/people.wordlist.txt" + }, + { + "name": "shell", + "path": "./cspell.dictionaries/shell.wordlist.txt" + }, + { + "name": "workspace", + "path": "./cspell.dictionaries/workspace.wordlist.txt" + } + ], + // files to ignore (globs supported) + "ignorePaths": [ + ".git/**", + "Cargo.lock", + "oranda.json", + "target/**", + "tests/**/fixtures/**", + "src/uu/dd/test-resources/**", + "vendor/**", + "**/*.svg", + "src/uu/*/locales/*.ftl", + "src/uudiff/locales/*.ftl", + ".devcontainer/**", + "util/gnu-patches/**", + "docs/src/release-notes/**", + "src/uu/*/benches/*.rs", + "src/uudiff/src/lib/features/benchmark.rs", + "util/check-safe-traversal.sh", + ], + "enableGlobDot": true, + // words to ignore (even if they are in the flagWords) + "ignoreWords": [], + // words to always consider correct + "words": [] +} \ No newline at end of file diff --git a/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt new file mode 100644 index 0000000..bdfcc26 --- /dev/null +++ b/.vscode/cspell.dictionaries/acronyms+names.wordlist.txt @@ -0,0 +1,80 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +# * abbreviations / acronyms +aarch +AIX +ASLR # address space layout randomization +AST # abstract syntax tree +CATN # busybox cat -n feature flag +CATV # busybox cat -v feature flag +CICD # continuous integration/deployment +CPU +CPUs +DevOps +Ext3 +FIFO +FIFOs +flac +FQDN # fully qualified domain name +GID # group ID +GIDs +GNU +GNUEABI +GNUEABIhf +impls +JFS +loongarch +lzma +MSRV # minimum supported rust version +MSVC +NixOS +POSIX +POSIXLY +ReiserFS +RISC +RISCV +RNG # random number generator +RNGs +Solaris +TOCTOU # time-of-check time-of-use +UID # user ID +UIDs +UUID # universally unique identifier +WASI +WASM +XFS + +# * names +BusyBox +BusyTest +Codacy +Cygwin +Deno +EditorConfig +EPEL +FreeBSD +genric +Gmail +Illumos +Irix +libfuzzer +MacOS +MinGW +Minix +MS-DOS +MSDOS +NetBSD +Novell +Nushell +OpenBSD +PowerPC +SELinux +SkyPack +SysV +Xenix +Yargs + +# Product +codspeed diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt new file mode 100644 index 0000000..e814469 --- /dev/null +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -0,0 +1,249 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +AFAICT +asimd +ASIMD +alloc +arity +autogenerate +autogenerated +autogenerates +bitmask +bitwise +bufferram +bytewise +canonicalization +canonicalize +canonicalizing +capget +codepoint +codepoints +codeready +codegen +colorizable +colorize +coprime +consts +conv +cyclomatic +dedup +deduplication +demangle +denoland +deque +dequeue +dev +EINTR +eintr +nextest +SIGUSR +nonprinting +multibyte +devs +discoverability +duplicative +dsync +endianness +enqueue +ERANGE +errored +executable +executables +exponentiate +eval +esac +falsey +fileio +filesystem +filesystems +flamegraph +footgun +freeram +fsxattr +fullblock +getfacl +getfattr +getopt +gibi +gibibytes +glob +globbing +hardcode +hardcoded +hardcoding +hardfloat +hardlink +hardlinks +hasher +hwcaps +infile +iflag +iflags +kibi +kibibytes +langinfo +libacl +lcase +listxattr +llistxattr +lossily +lstat +makedev +mebi +mebibytes +mergeable +microbenchmark +microbenchmarks +microbenchmarking +monomorphized +multibyte +multicall +nmerge +noatime +nocache +nocreat +noctty +noerror +noexec +nofollow +nolinks +nonblock +nonportable +nonprinting +nonrepeating +nonseekable +notrunc +nowrite +noxfer +ofile +oflag +oflags +openat +pdeathsig +peekable +performant +prctl +precompiled +precompute +preload +prepend +prepended +primality +pseudoprime +pseudoprimes +quantiles +readonly +ROOTFS +reparse +rposition +seedable +semver +semiprime +semiprimes +setcap +setfacl +setfattr +SETFL +setlocale +shortcode +shortcodes +setpgid +sigaction +CHLD +chld +SIGCHLD +sigchld +siginfo +SIGTTIN +sigttin +SIGTTOU +sigttou +sigusr +strcasecmp +subcommand +subexpression +submodule +sync +symlink +symlinks +syscall +syscalls +sysconf +tokenize +toolchain +totalram +truthy +tunables +TUNABLES +ucase +unbuffered +udeps +unescape +unintuitive +unprefixed +unportable +unsync +urand +whitespace +wordlist +wordlists +xattrs +xpass + +# * abbreviations +AMPM +ampm +consts +deps +dev +fdlimit +inacc +maint +proc +procs +TOCTOU + +# * constants +xffff + +# * variables +delim +errno +progname +retval +subdir +val +vals +inval +nofield + +# * clippy +uninlined +nonminimal +rposition + +# * CPU/hardware features +ASIMD +asimd +hwcaps +PCLMUL +pclmul +PCLMULQDQ +pclmulqdq +PMULL +pmull +TUNABLES +tunables +VMULL +vmull +ENOTSUP +enotsup +SETFL +tmpfs + +Hijri +Nowruz +charmap +hijri diff --git a/.vscode/cspell.dictionaries/people.wordlist.txt b/.vscode/cspell.dictionaries/people.wordlist.txt new file mode 100644 index 0000000..987bfb7 --- /dev/null +++ b/.vscode/cspell.dictionaries/people.wordlist.txt @@ -0,0 +1,9 @@ +# this list is unique to diffutils +Gunter Schmidt + Gunter + Schmidt +Sylvestre Ledru + Sylvestre + Ledru + +axodotdev diff --git a/.vscode/cspell.dictionaries/shell.wordlist.txt b/.vscode/cspell.dictionaries/shell.wordlist.txt new file mode 100644 index 0000000..eb5be04 --- /dev/null +++ b/.vscode/cspell.dictionaries/shell.wordlist.txt @@ -0,0 +1,123 @@ +# * diffutils project + + +# *** the following part is a copy of coreutils *** +# * Mac +clonefile + +# * POSIX +TMPDIR +adduser +csh +globstar +inotify +localtime +mksh +mountinfo +mountpoint +mtab +nullglob + +# * Signals +SIGUSR +SIGUSR1 +SIGUSR2 +SIGINT +SIGTERM +SIGKILL +SIGSTOP +SIGCONT +SIGPIPE +SIGALRM +SIGCHLD +passwd +pipefail +popd +ptmx +pushd +setarch +sh +sudo +sudoedit +tcsh +tzselect +urandom +VARNAME +wtmp +zsh + +# * Windows +APPDATA +COMSPEC +HKCU +HKLM +HOMEDRIVE +HOMEPATH +LOCALAPPDATA +PATHEXT +PATHEXT +SYSTEMROOT +USERDOMAIN +USERNAME +USERPROFILE +procmon + +# * `git` +gitattributes +gitignore + +# * `make` (`gmake`) +CURDIR +GNUMAKEFLAGS +GNUMakefile +LIBPATTERNS +MAKECMDGOALS +MAKEFILES +MAKEFLAGS +MAKELEVEL +MAKESHELL +SHELLSTATUS +VPATH +abspath +addprefix +addsuffix +endef +findstring +firstword +ifeq +ifneq +lastword +notdir +patsubst + + +# * `npm` +preversion + +# * utilities +cachegrind +chglog +codespell +commitlint +dprint +dtrace +flamegraph +flamegraphs +gcov +gmake +grcov +grep +markdownlint +rerast +rollup +samply +sed +selinuxenabled +sestatus +vdir +wslpath +xargs + +# * directories +sbin +libexec diff --git a/.vscode/cspell.dictionaries/workspace.wordlist.txt b/.vscode/cspell.dictionaries/workspace.wordlist.txt new file mode 100644 index 0000000..d87630e --- /dev/null +++ b/.vscode/cspell.dictionaries/workspace.wordlist.txt @@ -0,0 +1,402 @@ +# * diffutils project +diffutils +sdiff +uudiff + +debuginfo +tabsize + +# *** the following part is a copy of coreutils *** +# * cargo +cdylib +rlib + +# * crates +advapi +advapi32-sys +aho-corasick +backtrace +blake2b_simd + +# * uutils project +uutils +coreutils +uucore +uutests +ucmd +uumain +rlimit +mkfifo +urandom +uchild +ello +bstr +bytecount +byteorder +chacha +chrono +conv +corasick +crossterm +exacl +filetime +formatteriteminfo +fsext +getopts +getrandom +globset +indicatif +itertools +itoa +iuse +langid +lscolors +mdbook +memchr +multifilereader +onig +ouroboros +peekreader +quickcheck +rand_chacha +ringbuffer +rlimit +rstest +smallvec +tempdir +tempfile +termion +termios +termsize +termwidth +textwrap +thiserror +unic +ureq +walkdir +winapi +xattr + +# * rust/rustc +RUSTDOCFLAGS +RUSTFLAGS +clippy +rustc +rustfmt +rustup +rustdoc +# +bitor # BitOr trait function +bitxor # BitXor trait function +concat +fract +powi +println +repr +rfind +struct +structs +substr +splitn +trunc +uninit + +# * uutils +basenc +chcon +chgrp +chmod +chown +chroot +cksum +csplit +dircolors +hashsum +hostid +logname +mkdir +mkfifo +mknod +mktemp +nohup +nproc +numfmt +pathchk +printenv +printf +readlink +realpath +relpath +rmdir +runcon +shuf +sprintf +stdbuf +stty +tsort +uname +unexpand +whoami + +# * vars/errno +errno +EACCES +EBADF +EBUSY +EEXIST +EINVAL +ENODATA +ENOENT +ENOSYS +ENOTEMPTY +EOPNOTSUPP +EPERM +EPIPE +EROFS + +# * vars/fcntl +F_GETFL + GETFL +fcntl +vmsplice + +# * vars/libc +COMFOLLOW +EXDEV +FILENO +FTSENT +HOSTSIZE +IDSIZE +IFBLK +IFCHR +IFDIR +IFIFO +IFLNK +IFMT +IFREG +IFSOCK +IRGRP +IROTH +IRUSR +ISDIR +ISGID +ISUID +ISVTX +IWGRP +IWOTH +IWUSR +IXGRP +IXOTH +IXUSR +LINESIZE +NAMESIZE +RTLD_NEXT + RTLD +SIGABRT +SIGINT +SIGKILL +SIGSTOP +SIGTERM +SYS_fdatasync +SYS_syncfs +USERSIZE +accpath +addrinfo +addrlen +blocksize +canonname +chroot +dlsym +execvp +fdatasync +freeaddrinfo +getaddrinfo +getegid +geteuid +getgid +getgrgid +getgrnam +getgrouplist +getgroups +getpwent +getpwnam +getpwuid +getuid +inode +inodes +isatty +lchown +pathlen +setgid +setgroups +settime +setuid +socketpair +socktype +statfs +statp +statvfs +strcmp +strerror +strlen +syncfs +umask +waitpid +wcslen + +# * vars/nix +iovec +unistd + +# * vars/signals +SIGPIPE + +# * vars/std +CString +pathbuf + +# * vars/stat +bavail +bfree +bsize +ffree +frsize +fsid +fstat +fstype +namelen +# unix::fs::MetadataExt +atime # access time +blksize # blocksize for file system I/O +blocks # number of blocks allocated to file +ctime # creation time +dev # ID of device containing the file +gid # group ID of file owner +ino # inode number +mode # permissions +mtime # modification time +nlink # number of hard links to file +rdev # device ID if file is a character/block special file +size # total size of file in bytes +uid # user ID of file owner +nsec # nanosecond measurement scale +# freebsd::MetadataExt +iosize + +# * vars/time +Timespec +isdst +nanos +nsec +nsecs +strftime +strptime +subsec +usec +usecs +utcoff + +# * vars/utmpx +endutxent +getutxent +getutxid +getutxline +pututxline +setutxent +utmp +utmpx +utmpxname + +# * vars/winapi +DWORD +SYSTEMTIME +LPVOID +LPWSTR +ULONG +ULONGLONG +UNLEN +WCHAR +WSADATA +errhandlingapi +fileapi +handleapi +lmcons +minwinbase +minwindef +processthreadsapi +synchapi +sysinfoapi +winbase +winerror +winnt +winsock + +# * vars/selinux +freecon +getfilecon +lgetfilecon +lsetfilecon +restorecon +setfilecon + +# * vars/uucore +optflag +optflagmulti +optflagopt +optmulti +optopt + +# * uutils +ccmd +coreopts +coreutils +keepenv +libc +libstdbuf +musl +tmpd +uchild +ucmd +ucommand +utmpx +uucore +uucore_procs +uudoc +uufuzz +uumain +uutil +uutests +uutils + +# * function names +getcwd + +# * other +weblate +algs + +# * stty terminal flags +brkint +cstopb +decctlq +echoctl +echoe +echoke +ignbrk +ignpar +icrnl +isig +istrip +litout +opost +parodd +ENOTTY + +# translation tests +CLICOLOR +erreur +Utilisation +merror +merreur +verbo +inattendu diff --git a/Cargo.lock b/Cargo.lock index dbd92b4..58505a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.2" @@ -20,11 +26,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] [[package]] name = "anyhow" @@ -62,6 +112,19 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bitflags" version = "2.4.2" @@ -85,6 +148,12 @@ version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "cc" version = "1.0.90" @@ -129,29 +198,31 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.13" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", "terminal_size", ] [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codspeed" @@ -165,7 +236,7 @@ dependencies = [ "getrandom 0.2.17", "glob", "libc", - "nix", + "nix 0.31.2", "serde", "serde_json", "statrs", @@ -213,6 +284,12 @@ dependencies = [ "regex-lite", ] +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + [[package]] name = "colored" version = "2.2.0" @@ -244,6 +321,40 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "ctor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" +dependencies = [ + "ctor-proc-macro", + "dtor", +] + +[[package]] +name = "ctor-proc-macro" +version = "0.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + [[package]] name = "diff" version = "0.1.13" @@ -258,20 +369,36 @@ checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" [[package]] name = "diffutils" -version = "0.5.0" +version = "0.5.1" dependencies = [ "assert_cmd", - "chrono", + "clap", "codspeed-divan-compat", - "diff", - "itoa", + "ctor", + "itertools", + "phf", + "phf_codegen", "predicates", - "pretty_assertions", "rand", "regex", - "same-file", "tempfile", - "unicode-width", + "textwrap", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", + "uutests", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -285,6 +412,33 @@ dependencies = [ "syn", ] +[[package]] +name = "dns-lookup" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" +dependencies = [ + "cfg-if", + "libc", + "socket2", + "windows-sys 0.60.2", +] + +[[package]] +name = "dtor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" +dependencies = [ + "dtor-proc-macro", +] + +[[package]] +name = "dtor-proc-macro" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" + [[package]] name = "either" version = "1.15.0" @@ -313,6 +467,16 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "float-cmp" version = "0.10.0" @@ -322,6 +486,51 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -380,6 +589,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -421,6 +636,31 @@ dependencies = [ "serde_core", ] +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -436,6 +676,47 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" +[[package]] +name = "jiff" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", + "windows-sys 0.61.2", +] + +[[package]] +name = "jiff-static" +version = "0.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c900ef84826f1338a557697dc8fc601df9ca9af4ac137c7fb61d4c6f2dfd3076" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -459,15 +740,15 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.182" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] -name = "linux-raw-sys" -version = "0.4.15" +name = "libm" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "linux-raw-sys" @@ -487,6 +768,28 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nix" version = "0.31.2" @@ -505,21 +808,130 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "predicates" version = "3.1.4" @@ -588,11 +1000,35 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "procfs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7" +dependencies = [ + "bitflags", + "chrono", + "flate2", + "procfs-core", + "rustix", +] + +[[package]] +name = "procfs-core" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405" +dependencies = [ + "bitflags", + "chrono", + "hex", +] + [[package]] name = "quote" -version = "1.0.35" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -656,18 +1092,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] -name = "rustix" -version = "0.38.44" +name = "rlimit" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "f35ee2729c56bb610f6dba436bf78135f728b7373bdffae2ec815b2d3eb98cc3" dependencies = [ - "bitflags", - "errno", "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", ] +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "1.1.4" @@ -677,8 +1115,8 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "linux-raw-sys", + "windows-sys 0.61.2", ] [[package]] @@ -690,6 +1128,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + [[package]] name = "semver" version = "1.0.27" @@ -739,6 +1183,40 @@ dependencies = [ "zmij", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + +[[package]] +name = "siphasher" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "socket2" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "statrs" version = "0.18.0" @@ -749,6 +1227,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "2.0.117" @@ -769,18 +1253,18 @@ dependencies = [ "fastrand", "getrandom 0.4.2", "once_cell", - "rustix 1.1.4", - "windows-sys 0.59.0", + "rustix", + "windows-sys 0.61.2", ] [[package]] name = "terminal_size" -version = "0.3.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ - "rustix 0.38.44", - "windows-sys 0.48.0", + "rustix", + "windows-sys 0.61.2", ] [[package]] @@ -789,6 +1273,82 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "textwrap" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" +dependencies = [ + "smawk", + "terminal_size", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "libc", + "num-conv", + "num_threads", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] + [[package]] name = "toml_datetime" version = "1.0.0+spec-1.1.0" @@ -819,12 +1379,45 @@ dependencies = [ "winnow", ] +[[package]] +name = "type-map" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" +dependencies = [ + "rustc-hash", +] + +[[package]] +name = "unic-langid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" +dependencies = [ + "tinystr", +] + [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + [[package]] name = "unicode-width" version = "0.2.2" @@ -837,6 +1430,135 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "bytecount", + "clap", + "codspeed-divan-compat", + "diff", + "fluent", + "itoa", + "pretty_assertions", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "clap", + "codspeed-divan-compat", + "diff", + "fluent", + "pretty_assertions", + "rand", + "regex", + "same-file", + "tempfile", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_test" +version = "0.5.1" +dependencies = [ + "clap", + "fluent", + "libc", + "tempfile", + "thiserror", + "uucore", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "bigdecimal", + "clap", + "dns-lookup", + "fluent", + "fluent-bundle", + "fluent-syntax", + "jiff", + "libc", + "nix 0.30.1", + "num-traits", + "os_display", + "procfs", + "rustc-hash", + "thiserror", + "time", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix 0.31.2", + "os_display", + "pretty_assertions", + "rand", + "regex", + "same-file", + "tempfile", + "thiserror", + "unic-langid", + "unicode-width", + "uucore", +] + +[[package]] +name = "uutests" +version = "0.5.1" +dependencies = [ + "ctor", + "libc", + "nix 0.31.2", + "pretty_assertions", + "rand", + "regex", + "rlimit", + "tempfile", + "uucore", + "xattr", +] + [[package]] name = "wait-timeout" version = "0.2.0" @@ -958,6 +1680,15 @@ dependencies = [ "semver", ] +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi" version = "0.3.9" @@ -1000,41 +1731,35 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" -version = "0.48.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-link", ] [[package]] @@ -1046,7 +1771,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -1054,10 +1779,21 @@ dependencies = [ ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] [[package]] name = "windows_aarch64_gnullvm" @@ -1066,10 +1802,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +name = "windows_aarch64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -1078,10 +1814,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.48.5" +name = "windows_aarch64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -1089,6 +1825,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -1096,10 +1838,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.48.5" +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -1108,10 +1850,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" +name = "windows_i686_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -1120,10 +1862,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" +name = "windows_x86_64_gnu" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -1132,10 +1874,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "windows_x86_64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -1143,6 +1885,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.14" @@ -1240,12 +1988,38 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 1673839..532bc3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,52 +1,249 @@ +# diffutils (uutils) +# * see the repository LICENSE, README, and CONTRIBUTING files for more information + +# spell-checker:ignore (libs) ahash bigdecimal datetime serde gethostid kqueue libselinux mangen memmap uuhelp startswith constness expl unnested logind cfgs interner + [package] name = "diffutils" -version = "0.5.0" -edition = "2021" -description = "A CLI app for generating diff files" -license = "MIT OR Apache-2.0" +description = "diffutils ~ GNU diffutils (updated); implemented as universal (cross-platform) utils, written in Rust" +default-run = "diffutils" repository = "https://github.com/uutils/diffutils" - -[lib] -name = "diffutilslib" -path = "src/lib.rs" +edition.workspace = true +rust-version.workspace = true +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true [[bin]] name = "diffutils" -path = "src/main.rs" +path = "src/bin/diffutils.rs" -[dependencies] -chrono = "0.4.38" -diff = "0.1.13" -itoa = "1.0.11" +[features] +default = ["feat_common_core"] +## OS feature shortcodes +macos = ["feat_os_macos"] +unix = ["feat_os_unix"] +windows = ["feat_os_windows"] +# +## (primary platforms) feature sets +# "feat_os_macos" == set of utilities which can be built/run on the MacOS platform +feat_os_macos = [ + "feat_os_unix", ## == a modern/usual *nix platform + # +# "feat_require_unix_hostid", +] +# "feat_os_unix" == set of utilities which can be built/run on modern/usual *nix platforms. +feat_os_unix = [ +# "feat_Tier1", +# # +# "feat_require_unix", +# "feat_require_unix_hostid", +# "feat_require_unix_utmpx", +] +# "feat_os_windows" == set of utilities which can be built/run on modern/usual windows platforms +feat_os_windows = [ +# "feat_Tier1", ## == "feat_os_windows_legacy" + "hostname" +] +# +# TODO How are features centralized in this workspace file? +# # instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +# TODO feat_allow_case_insensitive_number_units = [] +# # Enables a check on options defined in NOT_YET_IMPLEMENTED. +# # If on the parser will return an error message in these cases. +# # This is preferable when running the util as unsupported options +# # are pointed out to the user, but can make tests fail. +## +## feature sets +## (common/core and Tier1) feature sets +# "feat_common_core" == baseline core set of utilities which can be built/run on most targets +feat_common_core = [ + "cmp", "diff" +] + +# Turn bench for diffutils diff off +feat_bench_not_diff = [] + +[workspace] +resolver = "3" +members = [ + ".", + "src/uu/*", +# "src/uu/stdbuf/src/libstdbuf", + "src/uudiff", +# "src/uucore_procs", + "tests/uutests", + # "fuzz", # TODO fuzz +] + +[workspace.package] +authors = ["uutils developers"] +categories = ["command-line-utilities"] +edition = "2024" +rust-version = "1.88.0" +homepage = "https://github.com/uutils/diffutils" +description = "A CLI app for generating diff files" +keywords = ["diffutils", "uutils", "cross-platform", "cli", "utility"] +license = "MIT" +# license = "MIT OR Apache-2.0" +readme = "README.package.md" +version = "0.5.1" + + +[workspace.dependencies] +assert_cmd = "2.2.0" +bytecount = "0.6.9" +chrono = "0.4.0" +clap = { version = "4.6", features = ["wrap_help", "cargo", "color"] } +const_format = "0.2.35" +ctor = "0.6.0" +diff_crate = { package = "diff", version = "0.1.13" } +divan = { version = "4.3.0", package = "codspeed-divan-compat" } +itertools = "0.14.0" +itoa = "1.0.18" +libc = "0.2.183" +nix = { version = "0.31.2", default-features = false } +num-traits = "0.2.19" +phf = "0.13.1" +phf_codegen = "0.13.1" +predicates = "3.1.0" +pretty_assertions = "1.4.0" +rand = "0.10.0" regex = "1.10.4" +rlimit = "0.11.0" same-file = "1.0.6" +tempfile = "3.27.0" +textwrap = { version = "0.16.1", features = ["terminal_size"] } +thiserror = "2.0.3" unicode-width = "0.2.0" +xattr = "1.3.1" + +# Fluent dependencies +fluent = "0.17.0" +fluent-bundle = "0.16.0" +unic-langid = "0.9.6" +fluent-syntax = "0.12.0" + +uucore = { version = "0.7.0", features = ["parser-size"]} +uudiff = { package = "uudiff", path = "src/uudiff" } +uutests = { version = "0.5.1", package = "uutests", path = "tests/uutests" } + +[dependencies] +# clap_complete = { workspace = true, optional = true } +# clap_mangen = { workspace = true, optional = true } +# bytecount.workspace = true +clap.workspace = true +itertools.workspace = true +phf.workspace = true +# sdiff.workspace = true +textwrap.workspace = true +uucore.workspace = true +uudiff.workspace = true + +# * uutils +# uu_test = { optional = true, package = "uu_test", path = "src/uu/test" } +# +cmp = { optional = true, package = "uu_cmp", path = "src/uu/cmp" } +diff = { optional = true, package = "uu_diff", path = "src/uu/diff" } +# diff3 = { package = "uu_diff3", path = "src/uu/diff3" } +# sdiff = {optional = true, package = "uu_sdiff", path = "src/uu/sdiff" } [dev-dependencies] -assert_cmd = "2.0.14" -divan = { version = "4.3.0", package = "codspeed-divan-compat" } -pretty_assertions = "1.4.0" -predicates = "3.1.0" -rand = "0.10.0" -tempfile = "3.26.0" +assert_cmd.workspace = true +ctor.workspace = true +divan.workspace = true +predicates.workspace = true +rand.workspace = true +regex.workspace = true +tempfile.workspace = true +uutests.workspace = true + +[build-dependencies] +phf_codegen.workspace = true [profile.release] -lto = "thin" +lto = true +panic = "abort" codegen-units = 1 +# for flamegraph +# debug = 1 + +# A release-like profile that is as small as possible. +[profile.release-small] +inherits = "release" +opt-level = "z" +strip = true + +[profile.release-fast] +inherits = "release" panic = "abort" -# alias profile for 'dist' +# A release-like profile with debug info for profiling. +# See https://github.com/mstange/samply . +[profile.profiling] +inherits = "release" +panic = "unwind" +debug = true + +# The profile that 'dist' will build with [profile.dist] inherits = "release" +lto = "thin" -[[bench]] -name = "bench_diffutils" -path = "benches/bench-diffutils.rs" -harness = false +[lints] +workspace = true + +[workspace.lints.clippy] +collapsible_if = { level = "allow", priority = 127 } # remove me +# The counts were generated with this command: +# cargo clippy --all-targets --workspace --message-format=json --quiet \ +# | jq -r '.message.code.code | select(. != null and startswith("clippy::"))' \ +# | sort | uniq -c | sort -h -r +# +# remove large_stack_arrays when https://github.com/rust-lang/rust-clippy/issues/13774, which is closed. +# +all = { level = "warn", priority = -1 } +cargo = { level = "warn", priority = -1 } +pedantic = { level = "warn", priority = -1 } +use_self = "warn" # nursery lint +cargo_common_metadata = "allow" # 3240 +multiple_crate_versions = "allow" # 2882 +missing_errors_doc = "allow" # 1572 +missing_panics_doc = "allow" # 946 +must_use_candidate = "allow" # 322 +match_same_arms = "allow" # 204 +cast_possible_truncation = "allow" # 122 +too_many_lines = "allow" # 101 +cast_possible_wrap = "allow" # 78 +cast_sign_loss = "allow" # 70 +struct_excessive_bools = "allow" # 68 +cast_precision_loss = "allow" # 52 +cast_lossless = "allow" # 35 +ignored_unit_patterns = "allow" # 21 +similar_names = "allow" # 20 +# large_stack_arrays = "allow" # 20 +needless_pass_by_value = "allow" # 16 +float_cmp = "allow" # 12 +items_after_statements = "allow" # 11 +return_self_not_must_use = "allow" # 8 +inline_always = "allow" # 6 +fn_params_excessive_bools = "allow" # 6 +used_underscore_items = "allow" # 2 +should_panic_without_expect = "allow" # 2 + +doc_markdown = "allow" +unused_self = "allow" +enum_glob_use = "allow" +unnested_or_patterns = "allow" +implicit_hasher = "allow" +doc_link_with_quotes = "allow" +format_push_string = "allow" +flat_map_option = "allow" +from_iter_instead_of_collect = "allow" +large_types_passed_by_value = "allow" + +[workspace.metadata.cargo-shear] +ignored = ["clap", "fluent", "libstdbuf"] -[features] -# default = ["feat_bench_not_diff"] -# Turn bench for diffutils cmp off -feat_bench_not_cmp = [] -# Turn bench for diffutils diff off -feat_bench_not_diff = [] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..21bd444 --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) uutils developers + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSE-APACHE b/LICENSE-APACHE deleted file mode 100644 index 3d8493e..0000000 --- a/LICENSE-APACHE +++ /dev/null @@ -1,179 +0,0 @@ -Copyright (c) Michael Howell -Copyright (c) uutils developers - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS diff --git a/LICENSE-MIT b/LICENSE-MIT deleted file mode 100644 index ba40932..0000000 --- a/LICENSE-MIT +++ /dev/null @@ -1,26 +0,0 @@ -Copyright (c) Michael Howell -Copyright (c) uutils developers - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/benches/bench-diffutils.rs b/benches/bench-diffutils.rs deleted file mode 100644 index e506b3f..0000000 --- a/benches/bench-diffutils.rs +++ /dev/null @@ -1,377 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -//! Benches for all utils in diffutils. -//! -//! There is a file generator included to create files of different sizes for comparison. \ -//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ -//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. - -/// Generate test files with these sizes in KB. -const FILE_SIZE_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; -// const FILE_SIZE_KILO_BYTES: [u64; 3] = [100, 1 * MB, 5 * MB]; -// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files -const TEMP_DIR: &str = ""; -const NUM_DIFF: u64 = 4; -// just for FILE_SIZE_KILO_BYTES -const MB: u64 = 1_000; -const CHANGE_CHAR: u8 = b'#'; - -#[cfg(not(feature = "feat_bench_not_cmp"))] -mod diffutils_cmp { - use std::hint::black_box; - - use diffutilslib::cmp; - use divan::Bencher; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { - let (from, to) = get_context().get_test_files_equal(kb); - let cmd = format!("cmp {from} {to}"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench the actual compare; cmp exits on first difference - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmp_compare_files_different(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_different(bytes); - let cmd = format!("cmp {from} {to} -s"); - let opts = str_to_options(&cmd).into_iter().peekable(); - let params = cmp::parse_params(opts).unwrap(); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| params.clone()) - .bench_refs(|params| black_box(cmp::cmp(¶ms).unwrap())); - } - - // bench original GNU cmp - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("cmp", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_cmp_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("cmp {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -#[cfg(not(feature = "feat_bench_not_diff"))] -mod diffutils_diff { - // use std::hint::black_box; - - use crate::{binary, prepare::*, FILE_SIZE_KILO_BYTES}; - // use diffutilslib::params; - use divan::Bencher; - - // bench the actual compare - // TODO diff does not have a diff function - // #[divan::bench(args = [100_000,10_000])] - // fn diff_compare_files(bencher: Bencher, bytes: u64) { - // let (from, to) = gen_testfiles(lines, 0, "id"); - // let cmd = format!("cmp {from} {to}"); - // let opts = str_to_options(&cmd).into_iter().peekable(); - // let params = params::parse_params(opts).unwrap(); - // - // bencher - // // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - // .with_inputs(|| params.clone()) - // .bench_refs(|params| diff::diff(¶ms).unwrap()); - // } - - // bench original GNU diff - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_gnu_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("{from} {to}"); - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("diff", cmd_args)); - } - - // bench the compiled release version - #[divan::bench(args = FILE_SIZE_KILO_BYTES)] - fn cmd_diff_release_equal(bencher: Bencher, bytes: u64) { - let (from, to) = get_context().get_test_files_equal(bytes); - let args_str = format!("diff {from} {to}"); - - bencher - // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) - .with_inputs(|| args_str.clone()) - .bench_refs(|cmd_args| binary::bench_binary("target/release/diffutils", cmd_args)); - } -} - -mod parser { - use std::hint::black_box; - - use diffutilslib::{cmp, params}; - use divan::Bencher; - - use crate::prepare::str_to_options; - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn cmp_parser(bencher: Bencher) { - let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(cmp::parse_params(data))); - } - - // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) - // #[divan::bench] - // fn cmp_parser_no_prepare() { - // let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; - // let args = str_to_options(&cmd).into_iter().peekable(); - // let _ = cmp::parse_params(args); - // } - - // bench the time it takes to parse the command line arguments - #[divan::bench] - fn diff_parser(bencher: Bencher) { - let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; - let args = str_to_options(&cmd).into_iter().peekable(); - bencher - .with_inputs(|| args.clone()) - .bench_values(|data| black_box(params::parse_params(data))); - } -} - -mod prepare { - use std::{ - ffi::OsString, - fs::{self, File}, - io::{BufWriter, Write}, - path::Path, - sync::OnceLock, - }; - - use rand::RngExt; - use tempfile::TempDir; - - use crate::{CHANGE_CHAR, FILE_SIZE_KILO_BYTES, NUM_DIFF, TEMP_DIR}; - - // file lines and .txt will be added - const FROM_FILE: &str = "from_file"; - const TO_FILE: &str = "to_file"; - const LINE_LENGTH: usize = 60; - - /// Contains test data (file names) which only needs to be created once. - #[derive(Debug, Default)] - pub struct BenchContext { - pub tmp_dir: Option, - pub dir: String, - pub files_equal: Vec<(String, String)>, - pub files_different: Vec<(String, String)>, - } - - impl BenchContext { - pub fn get_path(&self) -> &Path { - match &self.tmp_dir { - Some(tmp) => tmp.path(), - None => Path::new(&self.dir), - } - } - - pub fn get_test_files_equal(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_equal[p] - } - - #[allow(unused)] - pub fn get_test_files_different(&self, kb: u64) -> &(String, String) { - let p = FILE_SIZE_KILO_BYTES.iter().position(|f| *f == kb).unwrap(); - &self.files_different[p] - } - } - - // Since each bench function is separate in Divan it is more difficult to dynamically create test data. - // This keeps the TempDir alive until the program exits and generates the files only once. - static SHARED_CONTEXT: OnceLock = OnceLock::new(); - /// Creates the test files once and provides them to all tests. - pub fn get_context() -> &'static BenchContext { - SHARED_CONTEXT.get_or_init(|| { - let mut ctx = BenchContext::default(); - if TEMP_DIR.is_empty() { - let tmp_dir = TempDir::new().expect("Failed to create temp dir"); - ctx.tmp_dir = Some(tmp_dir); - } else { - // uses current directory, the generated files are kept - let path = Path::new(TEMP_DIR); - if !path.exists() { - fs::create_dir_all(path).expect("Path {path} could not be created"); - } - ctx.dir = TEMP_DIR.to_string(); - }; - - // generate test bytes - for kb in FILE_SIZE_KILO_BYTES { - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") - .expect("generate_test_files failed"); - ctx.files_equal.push(f); - let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") - .expect("generate_test_files failed"); - ctx.files_different.push(f); - } - - ctx - }) - } - - pub fn str_to_options(opt: &str) -> Vec { - let s: Vec = opt - .split(" ") - .into_iter() - .filter(|s| !s.is_empty()) - .map(|s| OsString::from(s)) - .collect(); - - s - } - - /// Generates two test files for comparison with size. - /// - /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. - /// If num_differences is set, '#' will be inserted between the first two words of a line, - /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. - fn generate_test_files_bytes( - dir: &Path, - bytes: u64, - num_differences: u64, - id: &str, - ) -> std::io::Result<(String, String)> { - let id = if id.is_empty() { - "".to_string() - } else { - format!("{id}_") - }; - let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); - let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); - let from_path = dir.join(f1); - let to_path = dir.join(f2); - - generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; - - Ok(( - from_path.to_string_lossy().to_string(), - to_path.to_string_lossy().to_string(), - )) - } - - fn generate_file_bytes( - from_name: &Path, - to_name: &Path, - bytes: u64, - num_differences: u64, - ) -> std::io::Result<()> { - let file_from = File::create(from_name)?; - let file_to = File::create(to_name)?; - // for int division, lines will be smaller than requested bytes - let n_lines = bytes / LINE_LENGTH as u64; - let change_every_n_lines = if num_differences == 0 { - 0 - } else { - let c = n_lines / num_differences; - if c == 0 { - 1 - } else { - c - } - }; - // Use a larger 128KB buffer for massive files - let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); - let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); - let mut rng = rand::rng(); - - // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes - let mut line_buffer = [b' '; 60]; - line_buffer[59] = b'\n'; // Set the newline once at the end - - for i in (0..n_lines).rev() { - // Fill only the letter positions, skipping spaces and the newline - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - - // Write the raw bytes directly to both files - writer_from.write_all(&line_buffer)?; - // make changes in the file - if num_differences == 0 { - writer_to.write_all(&line_buffer)?; - } else { - if i % change_every_n_lines == 0 && n_lines - i > 2 { - line_buffer[5] = CHANGE_CHAR; - } - writer_to.write_all(&line_buffer)?; - line_buffer[5] = b' '; - } - } - - // create last line - let missing = (bytes - n_lines as u64 * LINE_LENGTH as u64) as usize; - if missing > 0 { - for word_idx in 0..10 { - let start = word_idx * 6; // Each word + space block is 6 bytes - for i in 0..5 { - line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); - } - } - line_buffer[missing - 1] = b'\n'; - writer_from.write_all(&line_buffer[0..missing])?; - writer_to.write_all(&line_buffer[0..missing])?; - } - - writer_from.flush()?; - writer_to.flush()?; - - Ok(()) - } -} - -mod binary { - use std::process::Command; - - use crate::prepare::str_to_options; - - pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { - let args = str_to_options(cmd_args); - Command::new(program) - .args(args) - .status() - .expect("Failed to execute binary") - } -} - -fn main() { - // Run registered benchmarks. - divan::main(); -} diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..3c5084c --- /dev/null +++ b/build.rs @@ -0,0 +1,110 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (vars) krate mangen tldr + +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +pub fn main() { + const ENV_FEATURE_PREFIX: &str = "CARGO_FEATURE_"; + const FEATURE_PREFIX: &str = "feat_"; + const OVERRIDE_PREFIX: &str = "uu_"; + + // Do not rebuild build script unless the script itself or the enabled features are modified + // See + println!("cargo:rerun-if-changed=build.rs"); + + // Check for tldr.zip when building uudoc to warn users once at build time + // instead of repeatedly at runtime for each utility + if env::var("CARGO_FEATURE_UUDOC").is_ok() && !Path::new("docs/tldr.zip").exists() { + println!( + "cargo:warning=No tldr archive found, so the documentation will not include examples." + ); + println!("cargo:warning=To include examples, download the tldr archive:"); + println!( + "cargo:warning= curl -L https://github.com/tldr-pages/tldr/releases/latest/download/tldr.zip -o docs/tldr.zip" + ); + } + + if let Ok(profile) = env::var("PROFILE") { + println!("cargo:rustc-cfg=build={profile:?}"); + } + + let out_dir = env::var("OUT_DIR").unwrap(); + + let mut crates = Vec::new(); + for (key, val) in env::vars() { + if val == "1" && key.starts_with(ENV_FEATURE_PREFIX) { + let krate = key[ENV_FEATURE_PREFIX.len()..].to_lowercase(); + // Allow this as we have a bunch of info in the comments + #[allow(clippy::match_same_arms)] + match krate.as_ref() { + "default" | "macos" | "unix" | "windows" | "selinux" | "zip" | "clap_complete" + | "clap_mangen" | "fluent_syntax" => continue, // common/standard feature names + "nightly" | "test_unimplemented" | "expensive_tests" | "test_risky_names" => { + continue; + } // crate-local custom features + "uudoc" => continue, // is not a utility + "test" => continue, // over-ridden with 'uu_test' to avoid collision with rust core crate 'test' + s if s.starts_with(FEATURE_PREFIX) => continue, // crate feature sets + _ => {} // util feature name + } + crates.push(krate); + } + } + crates.sort(); + + let mut mf = File::create(Path::new(&out_dir).join("uutils_map.rs")).unwrap(); + + mf.write_all( + "type UtilityMap = phf::OrderedMap<&'static str, (fn(T) -> i32, fn() -> Command)>;\n\ + \n\ + #[allow(clippy::too_many_lines)] + #[allow(clippy::unreadable_literal)] + fn util_map() -> UtilityMap {\n" + .as_bytes(), + ) + .unwrap(); + + let mut phf_map = phf_codegen::OrderedMap::<&str>::new(); + let mut entries = Vec::new(); + + for krate in &crates { + let map_value = format!("({krate}::uumain, {krate}::uu_app)"); + match krate.as_ref() { + // 'test' is named uu_test to avoid collision with rust core crate 'test'. + // It can also be invoked by name '[' for the '[ expr ] syntax'. + "uu_test" => { + entries.push(("test", map_value.clone())); + entries.push(("[", map_value.clone())); + } + k if k.starts_with(OVERRIDE_PREFIX) => { + entries.push((&k[OVERRIDE_PREFIX.len()..], map_value.clone())); + } + "false" | "true" => { + entries.push(( + krate.as_str(), + format!("(r#{krate}::uumain, r#{krate}::uu_app)"), + )); + } + _ => { + entries.push((krate.as_str(), map_value.clone())); + } + } + } + entries.sort_by_key(|(name, _)| *name); + + for (name, value) in entries { + phf_map.entry(name, value); + } + + write!(mf, "{}", phf_map.build()).unwrap(); + mf.write_all(b"\n}\n").unwrap(); + + mf.flush().unwrap(); +} diff --git a/dist-workspace.toml b/dist-workspace.toml index 92c4095..0ca60f0 100644 --- a/dist-workspace.toml +++ b/dist-workspace.toml @@ -4,7 +4,7 @@ members = ["cargo:."] # Config for 'dist' [dist] # The preferred dist version to use in CI (Cargo.toml SemVer syntax) -cargo-dist-version = "0.30.3" +cargo-dist-version = "0.31.0" # CI backends to support ci = "github" # The installers to generate for each app diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 545c6ec..fe3030a 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "aho-corasick" version = "1.1.4" @@ -20,6 +26,62 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "arbitrary" version = "1.4.2" @@ -32,12 +94,37 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bigdecimal" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + [[package]] name = "bumpalo" version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "cc" version = "1.2.51" @@ -56,6 +143,23 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures", + "rand_core", +] + [[package]] name = "chrono" version = "0.4.42" @@ -70,31 +174,63 @@ dependencies = [ ] [[package]] -name = "const_format" -version = "0.2.35" +name = "clap" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad" +checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" dependencies = [ - "const_format_proc_macros", + "clap_builder", ] [[package]] -name = "const_format_proc_macros" -version = "0.2.34" +name = "clap_builder" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", + "anstream", + "anstyle", + "clap_lex", + "strsim", + "terminal_size", ] +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "diff" version = "0.1.13" @@ -103,211 +239,791 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "diffutils" +version = "0.5.1" +dependencies = [ + "clap", + "itertools", + "phf", + "phf_codegen", + "textwrap", + "uu_cmp", + "uu_diff", + "uucore", + "uudiff", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "find-msvc-tools" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fluent" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8137a6d5a2c50d6b0ebfcb9aaa91a28154e0a70605f112d30cb0cd4a78670477" +dependencies = [ + "fluent-bundle", + "unic-langid", +] + +[[package]] +name = "fluent-bundle" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01203cb8918f5711e73891b347816d932046f95f54207710bda99beaeb423bf4" +dependencies = [ + "fluent-langneg", + "fluent-syntax", + "intl-memoizer", + "intl_pluralrules", + "rustc-hash", + "self_cell", + "smallvec", + "unic-langid", +] + +[[package]] +name = "fluent-langneg" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eebbe59450baee8282d71676f3bfed5689aeab00b27545e83e5f14b1195e8b0" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "fluent-syntax" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54f0d287c53ffd184d04d8677f590f4ac5379785529e5e08b1c8083acdd5c198" +dependencies = [ + "memchr", + "thiserror", +] + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "rand_core", + "wasip2", + "wasip3", +] + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "heck" version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown 0.16.1", + "serde", + "serde_core", +] + +[[package]] +name = "intl-memoizer" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "310da2e345f5eb861e7a07ee182262e94975051db9e4223e909ba90f392f163f" +dependencies = [ + "type-map", + "unic-langid", +] + +[[package]] +name = "intl_pluralrules" +version = "7.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078ea7b7c29a2b4df841a7f6ac8775ff6074020c6776d48491ce2268e068f972" +dependencies = [ + "unic-langid", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "libm" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nix" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d0705320c1e6ba1d912b5e37cf18071b6c2e9b7fa8215a1e8a7651966f5d3" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "os_display" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad5fd71b79026fb918650dde6d125000a233764f1c2f1659a1c71118e33ea08f" dependencies = [ - "chrono", - "const_format", - "diff", - "itoa", - "regex", - "same-file", "unicode-width", ] [[package]] -name = "find-msvc-tools" -version = "0.1.6" +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "procfs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7" +dependencies = [ + "bitflags", + "chrono", + "flate2", + "procfs-core", + "rustix", +] + +[[package]] +name = "procfs-core" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405" +dependencies = [ + "bitflags", + "chrono", + "hex", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rand" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba" + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "self_cell" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" [[package]] -name = "getrandom" -version = "0.3.4" +name = "semver" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", -] +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] -name = "iana-time-zone" -version = "0.1.64" +name = "serde" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", + "serde_core", + "serde_derive", ] [[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "cc", + "serde_derive", ] [[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "jobserver" -version = "0.1.34" +name = "serde_derive" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ - "getrandom", - "libc", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "js-sys" -version = "0.3.83" +name = "serde_json" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ - "once_cell", - "wasm-bindgen", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", ] [[package]] -name = "libc" -version = "0.2.178" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] -name = "libfuzzer-sys" -version = "0.4.12" +name = "simd-adler32" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" -dependencies = [ - "arbitrary", - "cc", -] +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] -name = "log" -version = "0.4.29" +name = "siphasher" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] -name = "memchr" -version = "2.7.6" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] -name = "num-traits" -version = "0.2.19" +name = "smawk" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" [[package]] -name = "once_cell" -version = "1.21.3" +name = "strsim" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] -name = "proc-macro2" -version = "1.0.104" +name = "syn" +version = "2.0.112" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4" dependencies = [ + "proc-macro2", + "quote", "unicode-ident", ] [[package]] -name = "quote" -version = "1.0.42" +name = "tempfile" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ - "proc-macro2", + "fastrand", + "getrandom 0.4.2", + "once_cell", + "rustix", + "windows-sys", ] [[package]] -name = "r-efi" -version = "5.3.0" +name = "terminal_size" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" +dependencies = [ + "rustix", + "windows-sys", +] [[package]] -name = "regex" -version = "1.12.2" +name = "textwrap" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", + "smawk", + "terminal_size", + "unicode-linebreak", + "unicode-width", ] [[package]] -name = "regex-automata" -version = "0.4.13" +name = "thiserror" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", + "thiserror-impl", ] [[package]] -name = "regex-syntax" -version = "0.8.8" +name = "thiserror-impl" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "rustversion" -version = "1.0.22" +name = "tinystr" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "serde_core", + "zerovec", +] [[package]] -name = "same-file" -version = "1.0.6" +name = "type-map" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "cb30dbbd9036155e74adad6812e9898d03ec374946234fbcebd5dfc7b9187b90" dependencies = [ - "winapi-util", + "rustc-hash", ] [[package]] -name = "shlex" -version = "1.3.0" +name = "unic-langid" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "a28ba52c9b05311f4f6e62d5d9d46f094bd6e84cb8df7b3ef952748d752a7d05" +dependencies = [ + "unic-langid-impl", +] [[package]] -name = "syn" -version = "2.0.112" +name = "unic-langid-impl" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4" +checksum = "dce1bf08044d4b7a94028c93786f8566047edc11110595914de93362559bc658" dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", + "tinystr", ] [[package]] @@ -316,6 +1032,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + [[package]] name = "unicode-width" version = "0.2.2" @@ -334,6 +1056,98 @@ version = "0.0.0" dependencies = [ "diffutils", "libfuzzer-sys", + "uu_cmp", + "uu_diff", + "uudiff", +] + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uu_cmp" +version = "0.5.1" +dependencies = [ + "bytecount", + "clap", + "diff", + "fluent", + "itoa", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uu_diff" +version = "0.5.1" +dependencies = [ + "clap", + "diff", + "fluent", + "regex", + "same-file", + "unicode-width", + "uucore", + "uudiff", +] + +[[package]] +name = "uucore" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8038531f506a34ab4612b93f97d5f40759768cd34a83fd2af041b84fcbde474" +dependencies = [ + "bigdecimal", + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix 0.30.1", + "num-traits", + "os_display", + "procfs", + "rustc-hash", + "thiserror", + "unic-langid", + "uucore_procs", + "wild", +] + +[[package]] +name = "uucore_procs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f63e2d5083ff0983193a33e2d57fd271c7e3e3e7df8e46e8f471865647b2cbc" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "uudiff" +version = "0.5.1" +dependencies = [ + "chrono", + "clap", + "fluent", + "fluent-bundle", + "fluent-syntax", + "nix 0.31.2", + "os_display", + "rand", + "regex", + "same-file", + "tempfile", + "thiserror", + "unic-langid", + "unicode-width", + "uucore", ] [[package]] @@ -342,7 +1156,16 @@ version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.46.0", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", ] [[package]] @@ -390,6 +1213,49 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "wild" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" +dependencies = [ + "glob", +] + [[package]] name = "winapi-util" version = "0.1.11" @@ -472,3 +1338,113 @@ name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "serde", + "zerofrom", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 39efd70..23f2522 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,9 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4.7" diffutils = { path = "../" } +uu_cmp = { path = "../src/uu/cmp" } +uu_diff = { path = "../src/uu/diff" } +uudiff = { path = "../src/uudiff" } # Prevent this from interfering with workspaces [workspace] diff --git a/fuzz/fuzz_targets/fuzz_cmp.rs b/fuzz/fuzz_targets/fuzz_cmp.rs index e9d0e4c..71c1f0a 100644 --- a/fuzz/fuzz_targets/fuzz_cmp.rs +++ b/fuzz/fuzz_targets/fuzz_cmp.rs @@ -1,12 +1,15 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp::{self, Cmp}; +use std::convert::TryFrom; use std::ffi::OsString; use std::fs::File; use std::io::Write; +use uu_cmp::params_cmp::Params; +use uudiff::utils::CompareOk; + fn os(s: &str) -> OsString { OsString::from(s) } @@ -29,15 +32,23 @@ fuzz_target!(|x: (Vec, Vec)| { .write_all(&to) .unwrap(); + // let params = + // uu_cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let matches = uudiff::clap_localization::handle_clap_result_with_exit_code( + uu_cmp::params_cmp::uu_app(), + args, + 2, + ) + .unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); let params = - cmp::parse_params(args).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); - let ret = cmp::cmp(¶ms); - if from == to && !matches!(ret, Ok(Cmp::Equal)) { + Params::try_from(matches).unwrap_or_else(|e| panic!("Failed to parse params: {}", e)); + let ret = uu_cmp::cmp_compare(¶ms); + if from == to && !matches!(ret, Ok(CompareOk::Equal)) { panic!( "target/fuzz.cmp.a and target/fuzz.cmp.b are equal, but cmp returned {:?}.", ret ); - } else if from != to && !matches!(ret, Ok(Cmp::Different)) { + } else if from != to && !matches!(ret, Ok(CompareOk::Different)) { panic!( "target/fuzz.cmp.a and target/fuzz.cmp.b are different, but cmp returned {:?}.", ret diff --git a/fuzz/fuzz_targets/fuzz_cmp_args.rs b/fuzz/fuzz_targets/fuzz_cmp_args.rs index 579cf34..ed1ed17 100644 --- a/fuzz/fuzz_targets/fuzz_cmp_args.rs +++ b/fuzz/fuzz_targets/fuzz_cmp_args.rs @@ -1,23 +1,32 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::cmp; use libfuzzer_sys::Corpus; -use std::ffi::OsString; +use std::{convert::TryFrom, ffi::OsString}; +use uu_cmp::params_cmp::Params; fn os(s: &str) -> OsString { OsString::from(s) } -fuzz_target!(|x: Vec| -> Corpus { - if x.len() > 6 { +fuzz_target!(|args: Vec| -> Corpus { + if args.len() > 6 { // Make sure we try to parse an option when we get longer args. x[0] will be // the executable name. - if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&x[1]) { + if ![os("-l"), os("-b"), os("-s"), os("-n"), os("-i")].contains(&args[1]) { return Corpus::Reject; } } - let _ = cmp::parse_params(x.into_iter().peekable()); + // not sure what this does, mostly empty args + // dbg!(&args); + // let _ = uu_cmp::parse_params(x.into_iter().peekable()); + if let Ok(matches) = uudiff::clap_localization::handle_clap_result_with_exit_code( + uu_cmp::params_cmp::uu_app(), + args, + 2, + ) { + let _params = Params::try_from(matches); + } Corpus::Keep }); diff --git a/fuzz/fuzz_targets/fuzz_ed.rs b/fuzz/fuzz_targets/fuzz_ed.rs index 7c38fda..d2c4b46 100644 --- a/fuzz/fuzz_targets/fuzz_ed.rs +++ b/fuzz/fuzz_targets/fuzz_ed.rs @@ -1,12 +1,13 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::ed_diff; -use diffutilslib::ed_diff::DiffError; -use diffutilslib::params::Params; + use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::ed_diff::{self}; +use uu_diff::params_diff::Params; +use uu_diff::DiffError; fn diff_w(expected: &[u8], actual: &[u8], filename: &str) -> Result, DiffError> { let mut output = ed_diff::diff(expected, actual, &Params::default())?; diff --git a/fuzz/fuzz_targets/fuzz_normal.rs b/fuzz/fuzz_targets/fuzz_normal.rs index 6b1e6b9..4091e6c 100644 --- a/fuzz/fuzz_targets/fuzz_normal.rs +++ b/fuzz/fuzz_targets/fuzz_normal.rs @@ -1,13 +1,14 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::normal_diff; -use diffutilslib::params::Params; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::normal_diff; +use uu_diff::params_diff::Params; + fuzz_target!(|x: (Vec, Vec)| { let (from, to) = x; /*if let Ok(s) = String::from_utf8(from.clone()) { diff --git a/fuzz/fuzz_targets/fuzz_patch.rs b/fuzz/fuzz_targets/fuzz_patch.rs index 4dea4b5..6e6ef49 100644 --- a/fuzz/fuzz_targets/fuzz_patch.rs +++ b/fuzz/fuzz_targets/fuzz_patch.rs @@ -1,12 +1,13 @@ #![no_main] #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::params::Params; -use diffutilslib::unified_diff; use std::fs::{self, File}; use std::io::Write; use std::process::Command; +use uu_diff::params_diff::Params; +use uu_diff::unified_diff; + fuzz_target!(|x: (Vec, Vec, u8)| { let (from, to, context) = x; /*if let Ok(s) = String::from_utf8(from.clone()) { @@ -27,9 +28,9 @@ fuzz_target!(|x: (Vec, Vec, u8)| { &Params { from: "a/fuzz.file".into(), to: "target/fuzz.file".into(), - context_count: context as usize, + n_output_lines: context as usize, ..Default::default() - } + }, ); File::create("target/fuzz.file.original") .unwrap() diff --git a/fuzz/fuzz_targets/fuzz_side.rs b/fuzz/fuzz_targets/fuzz_side.rs index 8a69c07..534d9da 100644 --- a/fuzz/fuzz_targets/fuzz_side.rs +++ b/fuzz/fuzz_targets/fuzz_side.rs @@ -2,11 +2,11 @@ #[macro_use] extern crate libfuzzer_sys; -use diffutilslib::side_diff; - use std::fs::File; use std::io::Write; -use diffutilslib::params::Params; + +use uu_diff::params_diff::Params; +use uu_diff::side_diff; fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { let (original, new, /* width, tabsize, */ expand) = x; @@ -39,4 +39,4 @@ fuzz_target!(|x: (Vec, Vec, /* usize, usize */ bool)| { .unwrap() .write_all(&output_buf) .unwrap(); -}); \ No newline at end of file +}); diff --git a/src/bin/diffutils.rs b/src/bin/diffutils.rs new file mode 100644 index 0000000..558f785 --- /dev/null +++ b/src/bin/diffutils.rs @@ -0,0 +1,150 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use clap::Command; +use diffutils::validation; +use itertools::Itertools as _; +// conflicts with uu_cmp cmp +// use std::cmp; +use std::ffi::OsString; +use std::process; +use uucore::Args; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); +/// List of utils part of diffutils for a different error message if not found. +const DIFFUTILS: [&str; 5] = ["cmp", "diff", "diff3", "patch", "sdiff"]; + +include!(concat!(env!("OUT_DIR"), "/uutils_map.rs")); + +fn usage(utils: &UtilityMap, name: &str) { + println!("{name} {VERSION} (multi-call binary)\n"); + println!("Usage: {name} [function [arguments...]]"); + println!(" {name} --list"); + println!(); + #[cfg(feature = "feat_common_core")] + { + println!("Functions:"); + println!(" '' [arguments...]"); + println!(); + } + println!("Options:"); + println!(" --list lists all defined functions, one per row\n"); + println!("Currently defined functions:\n"); + let display_list = utils.keys().copied().join(", "); + let width = std::cmp::min(textwrap::termwidth(), 100) - 4 * 2; // (opinion/heuristic) max 100 chars wide with 4 character side indentions + println!( + "{}", + textwrap::indent(&textwrap::fill(&display_list, width), " ") + ); +} + +/// Entry into Diffutils +/// +/// # Arguments +/// * first arg needs to be the binary/executable. \ +/// This is usually diffutils, but can be the util name itself, e.g. 'ls'. \ +/// The util name will be checked against the list of enabled utils, where +/// * the name exactly matches the name of an applet/util or +/// * the name matches pattern, e.g. +/// 'my_own_directory_service_ls' as long as the last letters match the utility. +/// * diffutils arg: --list, --version, -V, --help, -h (or shortened long versions): \ +/// Output information about diffutils itself. \ +/// Multiple of these arguments, output limited to one, with help > version > list. +/// * util name and any number of arguments: \ +/// Will get passed on to the selected utility. \ +/// Error if util name is not recognized. +/// * --help or -h and a following util name: \ +/// Output help for that specific utility. \ +/// So 'diffutils sum --help' is the same as 'diffutils --help sum'. +#[allow(clippy::cognitive_complexity)] +fn main() { + uucore::panic::mute_sigpipe_panic(); + + let utils = util_map(); + let mut args = uucore::args_os(); + + let binary = validation::binary_path(&mut args); + let binary_as_util = validation::name(&binary).unwrap_or_else(|| { + usage(&utils, ""); + process::exit(0); + }); + + // binary name ends with util name? + let is_diffutils = binary_as_util.ends_with("utils"); + let matched_util = utils + .keys() + .filter(|&&u| binary_as_util.ends_with(u) && !is_diffutils) + .max_by_key(|u| u.len()); //Prefer stty more than tty. *utils is not ls + + let util_name = if let Some(&util) = matched_util { + Some(OsString::from(util)) + } else if is_diffutils || binary_as_util.ends_with("box") { + // todo: Remove support of "*box" from binary + uucore::set_utility_is_second_arg(); + args.next() + } else { + validation::not_found(&OsString::from(binary_as_util)); + }; + + // 0th/1st argument equals util name? + if let Some(util_os) = util_name { + let Some(util) = util_os.to_str() else { + // Not UTF-8 + validation::not_found(&util_os) + }; + + // Util in known list? + if let Some(&(uumain, _)) = utils.get(util) { + // TODO: plug the deactivation of the translation + // and load the English strings directly at compilation time in the + // binary to avoid the load of the flt + // Could be something like: + // #[cfg(not(feature = "only_english"))] + validation::setup_localization_or_exit(util); + process::exit(uumain(vec![util_os].into_iter().chain(args))); + } else { + // Known, but not yet implemented. + if DIFFUTILS.contains(&util) { + println!( + "The utility '{util}' is part of diffutils, but not yet implemented in Rust." + ); + let display_list = utils.keys().copied().join(", "); + println!("\nCurrently defined functions: {display_list}\n"); + process::exit(2); + } + let l = util.len(); + // GNU diffutils --help string shows help for diffutils + if util == "-h" || (l <= 6 && util[0..l] == "--help"[0..l]) { + usage(&utils, binary_as_util); + // process::exit(0); + // GNU diffutils --list string shows available utilities as list + } else if l <= 6 && util[0..l] == "--list"[0..l] { + // If --help is also present, show usage instead of list + if args.any(|arg| arg == "--help" || arg == "-h") { + usage(&utils, binary_as_util); + process::exit(0); + } + let utils: Vec<_> = utils.keys().collect(); + for util in utils { + println!("{util}"); + } + process::exit(0); + // GNU diffutils --version string shows version + } else if util == "-V" || (l <= 9 && util[0..l] == "--version"[0..l]) { + println!("{binary_as_util} {VERSION} (multi-call binary)"); + process::exit(0); + } else if util.starts_with('-') { + // Argument looks like an option but wasn't recognized + validation::unrecognized_option(binary_as_util, &util_os); + } else { + validation::not_found(&util_os); + } + } + } else { + // no arguments provided + usage(&utils, binary_as_util); + process::exit(0); + } +} diff --git a/src/cmp.rs b/src/cmp.rs deleted file mode 100644 index 587d5cc..0000000 --- a/src/cmp.rs +++ /dev/null @@ -1,1211 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -use crate::utils::format_failure_to_read_input_file; -use std::env::{self, ArgsOs}; -use std::ffi::OsString; -use std::io::{BufRead, BufReader, BufWriter, Read, Write}; -use std::iter::Peekable; -use std::process::ExitCode; -use std::{cmp, fs, io}; - -#[cfg(not(target_os = "windows"))] -use std::os::fd::{AsRawFd, FromRawFd}; - -#[cfg(not(target_os = "windows"))] -use std::os::unix::fs::MetadataExt; - -#[cfg(target_os = "windows")] -use std::os::windows::fs::MetadataExt; - -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Params { - executable: OsString, - from: OsString, - to: OsString, - print_bytes: bool, - skip_a: Option, - skip_b: Option, - max_bytes: Option, - verbose: bool, - quiet: bool, -} - -#[inline] -fn usage_string(executable: &str) -> String { - format!("Usage: {executable} ") -} - -#[cfg(not(target_os = "windows"))] -fn is_stdout_dev_null() -> bool { - let Ok(dev_null) = fs::metadata("/dev/null") else { - return false; - }; - - let stdout_fd = io::stdout().lock().as_raw_fd(); - - // SAFETY: we have exclusive access to stdout right now. - let stdout_file = unsafe { fs::File::from_raw_fd(stdout_fd) }; - let Ok(stdout) = stdout_file.metadata() else { - return false; - }; - - let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); - - // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). - std::mem::forget(stdout_file); - - is_dev_null -} - -pub fn parse_params>(mut opts: Peekable) -> Result { - let Some(executable) = opts.next() else { - return Err("Usage: ".to_string()); - }; - let executable_str = executable.to_string_lossy().to_string(); - - let parse_skip = |param: &str, skip_desc: &str| -> Result { - let suffix_start = param - .find(|b: char| !b.is_ascii_digit()) - .unwrap_or(param.len()); - let mut num = match param[..suffix_start].parse::() { - Ok(num) => num, - Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, - Err(_) => { - return Err(format!( - "{executable_str}: invalid --ignore-initial value '{skip_desc}'" - )) - } - }; - - if suffix_start != param.len() { - // Note that GNU cmp advertises supporting up to Y, but fails if you try - // to actually use anything beyond E. - let multiplier: usize = match ¶m[suffix_start..] { - "kB" => 1_000, - "K" => 1_024, - "MB" => 1_000_000, - "M" => 1_048_576, - "GB" => 1_000_000_000, - "G" => 1_073_741_824, - // This only generates a warning when compiling for target_pointer_width < 64 - #[allow(unused_variables)] - suffix @ ("TB" | "T" | "PB" | "P" | "EB" | "E") => { - #[cfg(target_pointer_width = "64")] - match suffix { - "TB" => 1_000_000_000_000, - "T" => 1_099_511_627_776, - "PB" => 1_000_000_000_000_000, - "P" => 1_125_899_906_842_624, - "EB" => 1_000_000_000_000_000_000, - "E" => 1_152_921_504_606_846_976, - _ => unreachable!(), - } - #[cfg(not(target_pointer_width = "64"))] - usize::MAX - } - "ZB" => usize::MAX, // 1_000_000_000_000_000_000_000, - "Z" => usize::MAX, // 1_180_591_620_717_411_303_424, - "YB" => usize::MAX, // 1_000_000_000_000_000_000_000_000, - "Y" => usize::MAX, // 1_208_925_819_614_629_174_706_176, - _ => { - return Err(format!( - "{executable_str}: invalid --ignore-initial value '{skip_desc}'" - )); - } - }; - - num = match num.overflowing_mul(multiplier) { - (n, false) => n, - _ => usize::MAX, - } - } - - Ok(num) - }; - - let mut params = Params { - executable, - ..Default::default() - }; - let mut from = None; - let mut to = None; - let mut skip_pos1 = None; - let mut skip_pos2 = None; - while let Some(param) = opts.next() { - if param == "--" { - break; - } - if param == "-" { - if from.is_none() { - from = Some(param); - } else if to.is_none() { - to = Some(param); - } else { - return Err(usage_string(&executable_str)); - } - continue; - } - if param == "-b" || param == "--print-bytes" { - params.print_bytes = true; - continue; - } - if param == "-l" || param == "--verbose" { - params.verbose = true; - continue; - } - if param == "-lb" || param == "-bl" { - params.print_bytes = true; - params.verbose = true; - continue; - } - - let param_str = param.to_string_lossy().to_string(); - if param == "-n" || param_str.starts_with("--bytes=") { - let max_bytes = if param == "-n" { - opts.next() - .ok_or_else(|| usage_string(&executable_str))? - .to_string_lossy() - .to_string() - } else { - let (_, arg) = param_str.split_once('=').unwrap(); - arg.to_string() - }; - let max_bytes = match max_bytes.parse::() { - Ok(num) => num, - Err(e) if *e.kind() == std::num::IntErrorKind::PosOverflow => usize::MAX, - Err(_) => { - return Err(format!( - "{executable_str}: invalid --bytes value '{max_bytes}'" - )) - } - }; - params.max_bytes = Some(max_bytes); - continue; - } - if param == "-i" || param_str.starts_with("--ignore-initial=") { - let skip_desc = if param == "-i" { - opts.next() - .ok_or_else(|| usage_string(&executable_str))? - .to_string_lossy() - .to_string() - } else { - let (_, arg) = param_str.split_once('=').unwrap(); - arg.to_string() - }; - let (skip_a, skip_b) = if let Some((skip_a, skip_b)) = skip_desc.split_once(':') { - ( - parse_skip(skip_a, &skip_desc)?, - parse_skip(skip_b, &skip_desc)?, - ) - } else { - let skip = parse_skip(&skip_desc, &skip_desc)?; - (skip, skip) - }; - params.skip_a = Some(skip_a); - params.skip_b = Some(skip_b); - continue; - } - if param == "-s" || param == "--quiet" || param == "--silent" { - params.quiet = true; - continue; - } - if param == "--help" { - println!("{}", usage_string(&executable_str)); - std::process::exit(0); - } - if param_str.starts_with('-') { - return Err(format!("unrecognized option '{}'", param.to_string_lossy())); - } - if from.is_none() { - from = Some(param); - } else if to.is_none() { - to = Some(param); - } else if skip_pos1.is_none() { - skip_pos1 = Some(parse_skip(¶m_str, ¶m_str)?); - } else if skip_pos2.is_none() { - skip_pos2 = Some(parse_skip(¶m_str, ¶m_str)?); - } else { - return Err(usage_string(&executable_str)); - } - } - - // Do as GNU cmp, and completely disable printing if we are - // outputing to /dev/null. - #[cfg(not(target_os = "windows"))] - if is_stdout_dev_null() { - params.quiet = true; - params.verbose = false; - params.print_bytes = false; - } - - if params.quiet && params.verbose { - return Err(format!( - "{executable_str}: options -l and -s are incompatible" - )); - } - - params.from = if let Some(from) = from { - from - } else if let Some(param) = opts.next() { - param - } else { - return Err(usage_string(&executable_str)); - }; - params.to = if let Some(to) = to { - to - } else if let Some(param) = opts.next() { - param - } else { - OsString::from("-") - }; - - // GNU cmp ignores positional skip arguments if -i is provided. - if params.skip_a.is_none() { - if skip_pos1.is_some() { - params.skip_a = skip_pos1; - } else if let Some(param) = opts.next() { - let param_str = param.to_string_lossy().to_string(); - params.skip_a = Some(parse_skip(¶m_str, ¶m_str)?); - } - }; - if params.skip_b.is_none() { - if skip_pos2.is_some() { - params.skip_b = skip_pos2; - } else if let Some(param) = opts.next() { - let param_str = param.to_string_lossy().to_string(); - params.skip_b = Some(parse_skip(¶m_str, ¶m_str)?); - } - } - - Ok(params) -} - -fn prepare_reader( - path: &OsString, - skip: &Option, - params: &Params, -) -> Result, String> { - let mut reader: Box = if path == "-" { - Box::new(BufReader::new(io::stdin())) - } else { - match fs::File::open(path) { - Ok(file) => Box::new(BufReader::new(file)), - Err(e) => { - return Err(format_failure_to_read_input_file( - ¶ms.executable, - path, - &e, - )); - } - } - }; - - if let Some(skip) = skip { - if let Err(e) = io::copy(&mut reader.by_ref().take(*skip as u64), &mut io::sink()) { - return Err(format_failure_to_read_input_file( - ¶ms.executable, - path, - &e, - )); - } - } - - Ok(reader) -} - -#[derive(Debug)] -pub enum Cmp { - Equal, - Different, -} - -pub fn cmp(params: &Params) -> Result { - let mut from = prepare_reader(¶ms.from, ¶ms.skip_a, params)?; - let mut to = prepare_reader(¶ms.to, ¶ms.skip_b, params)?; - - let mut offset_width = params.max_bytes.unwrap_or(usize::MAX); - - if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { - #[cfg(not(target_os = "windows"))] - let (a_size, b_size) = (a_meta.size(), b_meta.size()); - - #[cfg(target_os = "windows")] - let (a_size, b_size) = (a_meta.file_size(), b_meta.file_size()); - - // If the files have different sizes, we already know they are not identical. If we have not - // been asked to show even the first difference, we can quit early. - if params.quiet && a_size != b_size { - return Ok(Cmp::Different); - } - - let smaller = cmp::min(a_size, b_size) as usize; - offset_width = cmp::min(smaller, offset_width); - } - - let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize; - - // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces - let mut output = Vec::::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4); - - let mut at_byte = 1; - let mut at_line = 1; - let mut start_of_line = true; - let mut stdout = BufWriter::new(io::stdout().lock()); - let mut compare = Cmp::Equal; - loop { - // Fill up our buffers. - let from_buf = match from.fill_buf() { - Ok(buf) => buf, - Err(e) => { - return Err(format_failure_to_read_input_file( - ¶ms.executable, - ¶ms.from, - &e, - )); - } - }; - - let to_buf = match to.fill_buf() { - Ok(buf) => buf, - Err(e) => { - return Err(format_failure_to_read_input_file( - ¶ms.executable, - ¶ms.to, - &e, - )); - } - }; - - // Check for EOF conditions. - if from_buf.is_empty() && to_buf.is_empty() { - break; - } - - if from_buf.is_empty() || to_buf.is_empty() { - let eof_on = if from_buf.is_empty() { - ¶ms.from.to_string_lossy() - } else { - ¶ms.to.to_string_lossy() - }; - - report_eof(at_byte, at_line, start_of_line, eof_on, params); - return Ok(Cmp::Different); - } - - // Fast path - for long files in which almost all bytes are the same we - // can do a direct comparison to let the compiler optimize. - let consumed = std::cmp::min(from_buf.len(), to_buf.len()); - if from_buf[..consumed] == to_buf[..consumed] { - let last = from_buf[..consumed].last().unwrap(); - - at_byte += consumed; - at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count(); - - start_of_line = *last == b'\n'; - - if let Some(max_bytes) = params.max_bytes { - if at_byte > max_bytes { - break; - } - } - - from.consume(consumed); - to.consume(consumed); - - continue; - } - - // Iterate over the buffers, the zip iterator will stop us as soon as the - // first one runs out. - for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { - if from_byte != to_byte { - compare = Cmp::Different; - - if params.verbose { - format_verbose_difference( - from_byte, - to_byte, - at_byte, - offset_width, - &mut output, - params, - )?; - stdout.write_all(output.as_slice()).map_err(|e| { - format!( - "{}: error printing output: {e}", - params.executable.to_string_lossy() - ) - })?; - output.clear(); - } else { - report_difference(from_byte, to_byte, at_byte, at_line, params); - return Ok(Cmp::Different); - } - } - - start_of_line = from_byte == b'\n'; - if start_of_line { - at_line += 1; - } - - at_byte += 1; - - if let Some(max_bytes) = params.max_bytes { - if at_byte > max_bytes { - break; - } - } - } - - // Notify our readers about the bytes we went over. - from.consume(consumed); - to.consume(consumed); - } - - Ok(compare) -} - -// Exit codes are documented at -// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-cmp.html -// An exit status of 0 means no differences were found, -// 1 means some differences were found, -// and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = match parse_params(opts) { - Ok(param) => param, - Err(e) => { - eprintln!("{e}"); - return ExitCode::from(2); - } - }; - - if params.from == "-" && params.to == "-" - || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) - { - return ExitCode::SUCCESS; - } - - match cmp(¶ms) { - Ok(Cmp::Equal) => ExitCode::SUCCESS, - Ok(Cmp::Different) => ExitCode::from(1), - Err(e) => { - if !params.quiet { - eprintln!("{e}"); - } - ExitCode::from(2) - } - } -} - -#[inline] -fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { - *buf = [b' ', b' ', b'0']; - - let mut num = byte; - let mut idx = 2; // Start at the last position in the buffer - - // Generate octal digits - while num > 0 { - buf[idx] = b'0' + num % 8; - num /= 8; - idx = idx.saturating_sub(1); - } - - // SAFETY: the operations we do above always land within ascii range. - unsafe { std::str::from_utf8_unchecked(&buf[..]) } -} - -#[inline] -fn write_visible_byte(output: &mut Vec, byte: u8) -> usize { - match byte { - // Control characters: ^@, ^A, ..., ^_ - 0..=31 => { - output.push(b'^'); - output.push(byte + 64); - 2 - } - // Printable ASCII (space through ~) - 32..=126 => { - output.push(byte); - 1 - } - // DEL: ^? - 127 => { - output.extend_from_slice(b"^?"); - 2 - } - // High bytes with control equivalents: M-^@, M-^A, ..., M-^_ - 128..=159 => { - output.push(b'M'); - output.push(b'-'); - output.push(b'^'); - output.push(byte - 64); - 4 - } - // High bytes: M-, M-!, ..., M-~ - 160..=254 => { - output.push(b'M'); - output.push(b'-'); - output.push(byte - 128); - 3 - } - // Byte 255: M-^? - 255 => { - output.extend_from_slice(b"M-^?"); - 4 - } - } -} - -/// Writes a byte in visible form with right-padding to 4 spaces. -#[inline] -fn write_visible_byte_padded(output: &mut Vec, byte: u8) { - const SPACES: &[u8] = b" "; - const WIDTH: usize = SPACES.len(); - - let display_width = write_visible_byte(output, byte); - - // Add right-padding spaces - let padding = WIDTH.saturating_sub(display_width); - output.extend_from_slice(&SPACES[..padding]); -} - -/// Formats a byte as a visible string (for non-performance-critical path) -#[inline] -fn format_visible_byte(byte: u8) -> String { - let mut result = Vec::with_capacity(4); - write_visible_byte(&mut result, byte); - // SAFETY: the checks and shifts in write_visible_byte match what cat and GNU - // cmp do to ensure characters fall inside the ascii range. - unsafe { String::from_utf8_unchecked(result) } -} - -// This function has been optimized to not use the Rust fmt system, which -// leads to a massive speed up when processing large files: cuts the time -// for comparing 2 ~36MB completely different files in half on an M1 Max. -#[inline] -fn format_verbose_difference( - from_byte: u8, - to_byte: u8, - at_byte: usize, - offset_width: usize, - output: &mut Vec, - params: &Params, -) -> Result<(), String> { - assert!(!params.quiet); - - let mut at_byte_buf = itoa::Buffer::new(); - let mut from_oct = [0u8; 3]; // for octal conversions - let mut to_oct = [0u8; 3]; - - if params.print_bytes { - // "{:>width$} {:>3o} {:4} {:>3o} {}", - let at_byte_str = at_byte_buf.format(at_byte); - let at_byte_padding = offset_width.saturating_sub(at_byte_str.len()); - - for _ in 0..at_byte_padding { - output.push(b' ') - } - - output.extend_from_slice(at_byte_str.as_bytes()); - - output.push(b' '); - - output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); - - output.push(b' '); - - write_visible_byte_padded(output, from_byte); - - output.push(b' '); - - output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); - - output.push(b' '); - - write_visible_byte(output, to_byte); - - output.push(b'\n'); - } else { - // "{:>width$} {:>3o} {:>3o}" - let at_byte_str = at_byte_buf.format(at_byte); - let at_byte_padding = offset_width - at_byte_str.len(); - - for _ in 0..at_byte_padding { - output.push(b' ') - } - - output.extend_from_slice(at_byte_str.as_bytes()); - - output.push(b' '); - - output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); - - output.push(b' '); - - output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); - - output.push(b'\n'); - } - - Ok(()) -} - -#[inline] -fn report_eof(at_byte: usize, at_line: usize, start_of_line: bool, eof_on: &str, params: &Params) { - if params.quiet { - return; - } - - if at_byte == 1 { - eprintln!( - "{}: EOF on '{}' which is empty", - params.executable.to_string_lossy(), - eof_on - ); - } else if params.verbose { - eprintln!( - "{}: EOF on '{}' after byte {}", - params.executable.to_string_lossy(), - eof_on, - at_byte - 1, - ); - } else if start_of_line { - eprintln!( - "{}: EOF on '{}' after byte {}, line {}", - params.executable.to_string_lossy(), - eof_on, - at_byte - 1, - at_line - 1 - ); - } else { - eprintln!( - "{}: EOF on '{}' after byte {}, in line {}", - params.executable.to_string_lossy(), - eof_on, - at_byte - 1, - at_line - ); - } -} - -fn is_posix_locale() -> bool { - let locale = if let Ok(locale) = env::var("LC_ALL") { - locale - } else if let Ok(locale) = env::var("LC_MESSAGES") { - locale - } else if let Ok(locale) = env::var("LANG") { - locale - } else { - "C".to_string() - }; - - locale == "C" || locale == "POSIX" -} - -#[inline] -fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize, params: &Params) { - if params.quiet { - return; - } - - let term = if is_posix_locale() && !params.print_bytes { - "char" - } else { - "byte" - }; - print!( - "{} {} differ: {term} {}, line {}", - ¶ms.from.to_string_lossy(), - ¶ms.to.to_string_lossy(), - at_byte, - at_line - ); - if params.print_bytes { - let char_width = if to_byte >= 0x7F { 2 } else { 1 }; - print!( - " is {:>3o} {:char_width$} {:>3o} {:char_width$}", - from_byte, - format_visible_byte(from_byte), - to_byte, - format_visible_byte(to_byte) - ); - } - println!(); -} - -#[cfg(test)] -mod tests { - use super::*; - fn os(s: &str) -> OsString { - OsString::from(s) - } - - #[test] - fn positional() { - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - ..Default::default() - }), - parse_params([os("cmp"), os("foo"), os("bar")].iter().cloned().peekable()) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("-"), - ..Default::default() - }), - parse_params([os("cmp"), os("foo")].iter().cloned().peekable()) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("--help"), - ..Default::default() - }), - parse_params( - [os("cmp"), os("foo"), os("--"), os("--help")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1), - skip_b: None, - ..Default::default() - }), - parse_params( - [os("cmp"), os("foo"), os("bar"), os("1")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1), - skip_b: Some(usize::MAX), - ..Default::default() - }), - parse_params( - [os("cmp"), os("foo"), os("bar"), os("1"), os("2Y")] - .iter() - .cloned() - .peekable() - ) - ); - - // Bad positional arguments. - assert_eq!( - Err("Usage: cmp ".to_string()), - parse_params( - [os("cmp"), os("foo"), os("bar"), os("1"), os("2"), os("3")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Err("Usage: cmp ".to_string()), - parse_params([os("cmp")].iter().cloned().peekable()) - ); - } - - #[test] - fn execution_modes() { - let print_bytes = Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - print_bytes: true, - ..Default::default() - }; - assert_eq!( - Ok(print_bytes.clone()), - parse_params( - [os("cmp"), os("-b"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(print_bytes), - parse_params( - [os("cmp"), os("--print-bytes"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - let verbose = Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - verbose: true, - ..Default::default() - }; - assert_eq!( - Ok(verbose.clone()), - parse_params( - [os("cmp"), os("-l"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(verbose), - parse_params( - [os("cmp"), os("--verbose"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - let verbose_and_print_bytes = Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - print_bytes: true, - verbose: true, - ..Default::default() - }; - assert_eq!( - Ok(verbose_and_print_bytes.clone()), - parse_params( - [os("cmp"), os("-l"), os("-b"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(verbose_and_print_bytes.clone()), - parse_params( - [os("cmp"), os("-lb"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(verbose_and_print_bytes), - parse_params( - [os("cmp"), os("-bl"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - quiet: true, - ..Default::default() - }), - parse_params( - [os("cmp"), os("-s"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - // Some options do not mix. - assert_eq!( - Err("cmp: options -l and -s are incompatible".to_string()), - parse_params( - [os("cmp"), os("-l"), os("-s"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - } - - #[test] - fn max_bytes() { - let max_bytes = Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - max_bytes: Some(1), - ..Default::default() - }; - assert_eq!( - Ok(max_bytes.clone()), - parse_params( - [os("cmp"), os("-n"), os("1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(max_bytes), - parse_params( - [os("cmp"), os("--bytes=1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - max_bytes: Some(usize::MAX), - ..Default::default() - }), - parse_params( - [ - os("cmp"), - os("--bytes=99999999999999999999999999999999999999999999999999999999999"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - ); - - // Failure case - assert_eq!( - Err("cmp: invalid --bytes value '1K'".to_string()), - parse_params( - [os("cmp"), os("--bytes=1K"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - } - - #[test] - fn skips() { - let skips = Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1), - skip_b: Some(1), - ..Default::default() - }; - assert_eq!( - Ok(skips.clone()), - parse_params( - [os("cmp"), os("-i"), os("1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Ok(skips), - parse_params( - [os("cmp"), os("--ignore-initial=1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(usize::MAX), - skip_b: Some(usize::MAX), - ..Default::default() - }), - parse_params( - [ - os("cmp"), - os("-i"), - os("99999999999999999999999999999999999999999999999999999999999"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1), - skip_b: Some(2), - ..Default::default() - }), - parse_params( - [os("cmp"), os("--ignore-initial=1:2"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1_000_000_000), - #[cfg(target_pointer_width = "32")] - skip_b: Some((2_147_483_647.5 * 2.0) as usize), - #[cfg(target_pointer_width = "64")] - skip_b: Some(1_152_921_504_606_846_976 * 2), - ..Default::default() - }), - parse_params( - [ - os("cmp"), - os("--ignore-initial=1GB:2E"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - ); - - // All special suffixes. - for (i, suffixes) in [ - ["kB", "K"], - ["MB", "M"], - ["GB", "G"], - ["TB", "T"], - ["PB", "P"], - ["EB", "E"], - ["ZB", "Z"], - ["YB", "Y"], - ] - .iter() - .enumerate() - { - let values = [ - 1_000usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), - 1024usize.checked_pow((i + 1) as u32).unwrap_or(usize::MAX), - ]; - for (j, v) in values.iter().enumerate() { - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(*v), - skip_b: Some(2), - ..Default::default() - }), - parse_params( - [ - os("cmp"), - os("-i"), - os(&format!("1{}:2", suffixes[j])), - os("foo"), - os("bar"), - ] - .iter() - .cloned() - .peekable() - ) - ); - } - } - - // Ignores positional arguments when -i is provided. - assert_eq!( - Ok(Params { - executable: os("cmp"), - from: os("foo"), - to: os("bar"), - skip_a: Some(1), - skip_b: Some(2), - ..Default::default() - }), - parse_params( - [ - os("cmp"), - os("-i"), - os("1:2"), - os("foo"), - os("bar"), - os("3"), - os("4") - ] - .iter() - .cloned() - .peekable() - ) - ); - - // Failure cases - assert_eq!( - Err("cmp: invalid --ignore-initial value '1mb'".to_string()), - parse_params( - [os("cmp"), os("--ignore-initial=1mb"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Err("cmp: invalid --ignore-initial value '1:2:3'".to_string()), - parse_params( - [ - os("cmp"), - os("--ignore-initial=1:2:3"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - ); - assert_eq!( - Err("cmp: invalid --ignore-initial value '-1'".to_string()), - parse_params( - [os("cmp"), os("--ignore-initial=-1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - ); - } -} diff --git a/src/common/mod.rs b/src/common/mod.rs new file mode 100644 index 0000000..df389b9 --- /dev/null +++ b/src/common/mod.rs @@ -0,0 +1,6 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +pub mod validation; diff --git a/src/common/validation.rs b/src/common/validation.rs new file mode 100644 index 0000000..615da92 --- /dev/null +++ b/src/common/validation.rs @@ -0,0 +1,122 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore prefixcat testcat + +use std::ffi::{OsStr, OsString}; +use std::io::{Write, stderr}; +use std::path::{Path, PathBuf}; +use std::process; + +use uucore::Args; +use uucore::display::Quotable; +use uudiff::locale; + +/// Gets all available utilities including "diffutils" +#[allow(clippy::type_complexity)] +pub fn get_all_utilities( + util_map: &phf::OrderedMap<&'static str, (fn(T) -> i32, fn() -> clap::Command)>, +) -> Vec<&'static str> { + std::iter::once("diffutils") + .chain(util_map.keys().copied()) + .collect() +} + +/// Prints a "utility not found" error and exits +pub fn not_found(util: &OsStr) -> ! { + let _ = writeln!( + stderr(), + "diffutils: unknown program '{}'", + util.maybe_quote() + ); + process::exit(2); +} + +/// Prints an "unrecognized option" error and exits +pub fn unrecognized_option(binary_name: &str, option: &OsStr) -> ! { + let _ = writeln!( + stderr(), + "{binary_name}: unrecognized option '{}'", + option.to_string_lossy() + ); + process::exit(1); +} + +/// Sets up localization for a utility with proper error handling +pub fn setup_localization_or_exit(util_name: &str) { + let util_name = get_canonical_util_name(util_name); + locale::setup_localization(util_name).unwrap_or_else(|err| { + match err { + locale::LocalizationError::ParseResource { + error: err_msg, + snippet, + } => eprintln!("ALocalization parse error at {snippet}: {err_msg}"), + other => eprintln!("Could not init the localization system: {other}"), + } + process::exit(99) + }); +} + +/// Gets the canonical utility name, resolving aliases +fn get_canonical_util_name(util_name: &str) -> &str { + match util_name { + // uu_test aliases - '[' is an alias for test + "[" => "test", + "dir" => "ls", // dir is an alias for ls + "vdir" => "ls", // vdir is an alias for ls + + // Default case - return the util name as is + _ => util_name, + } +} + +/// Gets the binary path from command line arguments +/// # Panics +/// Panics if the binary path cannot be determined +pub fn binary_path(args: &mut impl Iterator) -> PathBuf { + match args.next() { + Some(ref s) if !s.is_empty() => PathBuf::from(s), + _ => std::env::current_exe().unwrap(), + } +} + +/// Extracts the binary name from a path +pub fn name(binary_path: &Path) -> Option<&str> { + binary_path.file_stem()?.to_str() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_canonical_util_name() { + // Test a few key aliases + assert_eq!(get_canonical_util_name("["), "test"); + assert_eq!(get_canonical_util_name("dir"), "ls"); + + // Test passthrough case + assert_eq!(get_canonical_util_name("cat"), "cat"); + } + + #[test] + fn test_name() { + // Test normal executable name + assert_eq!(name(Path::new("/usr/bin/ls")), Some("ls")); + assert_eq!(name(Path::new("cat")), Some("cat")); + assert_eq!( + name(Path::new("./target/debug/diffutils")), + Some("diffutils") + ); + + // Test with extensions + assert_eq!(name(Path::new("program.exe")), Some("program")); + assert_eq!(name(Path::new("/path/to/utility.bin")), Some("utility")); + + // Test edge cases + assert_eq!(name(Path::new("")), None); + assert_eq!(name(Path::new("/")), None); + } +} diff --git a/src/diff.rs b/src/diff.rs deleted file mode 100644 index f4c0614..0000000 --- a/src/diff.rs +++ /dev/null @@ -1,102 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -use crate::params::{parse_params, Format}; -use crate::utils::report_failure_to_read_input_file; -use crate::{context_diff, ed_diff, normal_diff, side_diff, unified_diff}; -use std::env::ArgsOs; -use std::ffi::OsString; -use std::fs; -use std::io::{self, stdout, Read, Write}; -use std::iter::Peekable; -use std::process::{exit, ExitCode}; - -// Exit codes are documented at -// https://www.gnu.org/software/diffutils/manual/html_node/Invoking-diff.html. -// An exit status of 0 means no differences were found, -// 1 means some differences were found, -// and 2 means trouble. -pub fn main(opts: Peekable) -> ExitCode { - let params = parse_params(opts).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }); - // if from and to are the same file, no need to perform any comparison - let maybe_report_identical_files = || { - if params.report_identical_files { - println!( - "Files {} and {} are identical", - params.from.to_string_lossy(), - params.to.to_string_lossy(), - ); - } - }; - if params.from == "-" && params.to == "-" - || same_file::is_same_file(¶ms.from, ¶ms.to).unwrap_or(false) - { - maybe_report_identical_files(); - return ExitCode::SUCCESS; - } - - // read files - fn read_file_contents(filepath: &OsString) -> io::Result> { - if filepath == "-" { - let mut content = Vec::new(); - io::stdin().read_to_end(&mut content).and(Ok(content)) - } else { - fs::read(filepath) - } - } - let mut io_error = false; - let from_content = match read_file_contents(¶ms.from) { - Ok(from_content) => from_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.from, &e); - io_error = true; - vec![] - } - }; - let to_content = match read_file_contents(¶ms.to) { - Ok(to_content) => to_content, - Err(e) => { - report_failure_to_read_input_file(¶ms.executable, ¶ms.to, &e); - io_error = true; - vec![] - } - }; - if io_error { - return ExitCode::from(2); - } - - // run diff - let result: Vec = match params.format { - Format::Normal => normal_diff::diff(&from_content, &to_content, ¶ms), - Format::Unified => unified_diff::diff(&from_content, &to_content, ¶ms), - Format::Context => context_diff::diff(&from_content, &to_content, ¶ms), - Format::Ed => ed_diff::diff(&from_content, &to_content, ¶ms).unwrap_or_else(|error| { - eprintln!("{error}"); - exit(2); - }), - Format::SideBySide => { - let mut output = stdout().lock(); - side_diff::diff(&from_content, &to_content, &mut output, ¶ms) - } - }; - if params.brief && !result.is_empty() { - println!( - "Files {} and {} differ", - params.from.to_string_lossy(), - params.to.to_string_lossy() - ); - } else { - io::stdout().write_all(&result).unwrap(); - } - if result.is_empty() { - maybe_report_identical_files(); - ExitCode::SUCCESS - } else { - ExitCode::from(1) - } -} diff --git a/src/lib.rs b/src/lib.rs index 342b01c..2581a8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,8 @@ -pub mod cmp; -pub mod context_diff; -pub mod ed_diff; -pub mod macros; -pub mod normal_diff; -pub mod params; -pub mod side_diff; -pub mod unified_diff; -pub mod utils; +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. -// Re-export the public functions/types you need -pub use context_diff::diff as context_diff; -pub use ed_diff::diff as ed_diff; -pub use normal_diff::diff as normal_diff; -pub use side_diff::diff as side_by_side_diff; -pub use unified_diff::diff as unified_diff; +pub mod common; + +pub use common::validation; diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index b7c2712..0000000 --- a/src/main.rs +++ /dev/null @@ -1,81 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -use std::{ - env::ArgsOs, - ffi::{OsStr, OsString}, - iter::Peekable, - path::{Path, PathBuf}, - process::ExitCode, -}; - -mod cmp; -mod context_diff; -mod diff; -mod ed_diff; -mod macros; -mod normal_diff; -mod params; -mod side_diff; -mod unified_diff; -mod utils; - -/// # Panics -/// Panics if the binary path cannot be determined -fn binary_path(args: &mut Peekable) -> PathBuf { - match args.peek() { - Some(ref s) if !s.is_empty() => PathBuf::from(s), - _ => std::env::current_exe().unwrap(), - } -} - -/// #Panics -/// Panics if path has no UTF-8 valid name -fn name(binary_path: &Path) -> &OsStr { - binary_path.file_stem().unwrap() -} - -const VERSION: &str = env!("CARGO_PKG_VERSION"); - -fn usage(name: &str) { - println!("{name} {VERSION} (multi-call binary)\n"); - println!("Usage: {name} [function [arguments...]]\n"); - println!("Currently defined functions:\n"); - println!(" cmp, diff\n"); -} - -fn second_arg_error(name: &OsStr) -> ! { - eprintln!("Expected utility name as second argument, got nothing."); - usage(&name.to_string_lossy()); - std::process::exit(0); -} - -fn main() -> ExitCode { - let mut args = std::env::args_os().peekable(); - - let exe_path = binary_path(&mut args); - let exe_name = name(&exe_path); - - let util_name = if exe_name == "diffutils" { - // Discard the item we peeked. - let _ = args.next(); - - args.peek() - .cloned() - .unwrap_or_else(|| second_arg_error(exe_name)) - } else { - OsString::from(exe_name) - }; - - match util_name.to_str() { - Some("diff") => diff::main(args), - Some("cmp") => cmp::main(args), - Some(name) => { - eprintln!("{name}: utility not supported"); - ExitCode::from(2) - } - None => second_arg_error(exe_name), - } -} diff --git a/src/uu/cmp/Cargo.toml b/src/uu/cmp/Cargo.toml new file mode 100644 index 0000000..a5f4146 --- /dev/null +++ b/src/uu/cmp/Cargo.toml @@ -0,0 +1,60 @@ +[package] +name = "uu_cmp" +description = "cmp ~ (uutils) decode/encode input (cmp file compare)" +# The tool dist does not allow different repository names within the workspace. +repository = "https://github.com/uutils/diffutils" +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/cmp" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[[bin]] +name = "cmp" +path = "src/main.rs" + +[lib] +path = "src/cmp.rs" + +[features] +default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + + +[dependencies] +bytecount.workspace = true +clap.workspace = true +diff_crate.workspace = true +fluent.workspace = true +itoa.workspace = true +regex.workspace = true +same-file.workspace = true +uucore = { workspace = true, features = [ + "parser-size", +] } +uudiff.workspace = true +unicode-width.workspace = true + +[dev-dependencies] +divan.workspace = true +pretty_assertions.workspace = true +uudiff.workspace = true +tempfile.workspace = true + +[profile.release] +# for flamegraph +# debug = true + +[[bench]] +name = "cmp_bench" +harness = false diff --git a/src/uu/cmp/LICENSE b/src/uu/cmp/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/cmp/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/cmp/benches/cmp_bench.rs b/src/uu/cmp/benches/cmp_bench.rs new file mode 100644 index 0000000..925d36d --- /dev/null +++ b/src/uu/cmp/benches/cmp_bench.rs @@ -0,0 +1,151 @@ +#![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use std::sync::OnceLock; + +use divan::Bencher; +use tempfile::TempDir; +use uu_cmp::{params_cmp::Params, uu_app}; +// use uu_cmp::parse_params; +// use uu_cmp::uumain; +use uudiff::benchmark::{ + bench_binary, + prepare_bench::{BenchContext, generate_test_files_bytes}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn cmp_parser(bencher: Bencher) { + let cmd = "cmd file_1.txt file_2.txt -bl -n10M --ignore-initial=100KiB:1MiB"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher.with_inputs(|| args.clone()).bench_values( + // |params: std::iter::Peekable>| parse_params(params), + |params: std::iter::Peekable>| { + let matches = uudiff::clap_localization::handle_clap_result(uu_app(), params).unwrap(); + let params: Params = matches.try_into().unwrap(); + }, + ); + // ); +} + +// // // test the impact on the benchmark if not converting the cmd to Vec (doubles for parse) +// #[divan::bench] +// fn cmp_parser_no_prepare() { +// let cmd = "cmd file_1.txt file_2.txt -bl n10M --ignore-initial=100KiB:1MiB"; +// let args = str_to_args(&cmd).into_iter().peekable(); +// let _ = parse_params(args); +// } + +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("cmp {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// bench different; cmp exits on first difference +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmp_compare_files_different(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_different_kb(kb).unwrap(); + let cmd = format!("cmp -s {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_cmp::uumain(params.peekable())); +} + +// TODO use coreutils bench logic +// bench original GNU cmp +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary("cmp", cmd_args)); +} + +// bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_cmp_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/cmp"); + let prg = path.to_string() + "target/release/cmp"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| bench_binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = std::path::Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/uu/cmp/locales/en-US.ftl b/src/uu/cmp/locales/en-US.ftl new file mode 100644 index 0000000..1a73674 --- /dev/null +++ b/src/uu/cmp/locales/en-US.ftl @@ -0,0 +1,15 @@ +# TODO French translation +cmp-about = Compare two binary files for differences. + + With no FILE, or when FILE is -, read standard input. +cmp-usage = cmp [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]] + +# Help messages +cmp-help-bytes-limit = compare at most LIMIT bytes +cmp-help-ignore-initial = SKIP - skip first SKIP bytes of both inputs + SKIP1:SKIP2 - set SKIP for each input individually +cmp-help-print-bytes = print differing bytes +cmp-help-quiet = quiet, same as --silent +cmp-help-silent = suppress all normal output +cmp-help-verbose = output byte numbers and differing byte values + diff --git a/src/uu/cmp/src/cmp.rs b/src/uu/cmp/src/cmp.rs new file mode 100644 index 0000000..53cf803 --- /dev/null +++ b/src/uu/cmp/src/cmp.rs @@ -0,0 +1,511 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +// spell-checker:ignore ilog + +pub mod params_cmp; + +use std::env::{self}; +use std::ffi::OsString; +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; +use std::{cmp, fs, io}; + +use clap::Command; +use uudiff::common_errors::UtilsError; +use uudiff::error::{FromIo, UResult}; +use uudiff::utils::{self, CompareOk}; + +use crate::params_cmp::{BytesLimitU64, Params, SkipU64}; + +#[cfg(target_os = "windows")] +use std::os::windows::fs::MetadataExt; + +#[cfg(not(target_os = "windows"))] +use std::os::unix::fs::MetadataExt; + +/// Entry into cmp. +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let matches = uudiff::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?; + + let params: Params = matches.try_into()?; + + match cmp_compare(¶ms) { + Ok(res) => match res { + CompareOk::Equal => uucore::error::set_exit_code(0), + CompareOk::Different => uucore::error::set_exit_code(1), + }, + Err(e) => { + // dbg!(¶ms, &e); + if params.silent { + uucore::error::set_exit_code(2); + return Ok(()); + } + return Err(e); + } + } + + Ok(()) +} + +pub fn cmp_compare(params: &Params) -> UResult { + // check if file is actually a directory, which is not allowed + if params.from != "-" { + match fs::metadata(¶ms.from) { + Ok(m) => { + if m.is_dir() { + return Err(UtilsError::DirectoryNotAllowed(params.from.clone()).into()); + } + } + Err(e) => { + let io = e.map_err_context(|| params.from_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + } + } + if params.to != "-" { + match fs::metadata(¶ms.to) { + Ok(m) => { + if m.is_dir() { + return Err(UtilsError::DirectoryNotAllowed(params.to.clone()).into()); + } + } + Err(e) => { + let io = e.map_err_context(|| params.to_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + } + } + // check is same file and has no shift by skipping bytes + if utils::is_same_file(¶ms.from, ¶ms.to) + && params.skip_bytes_from == params.skip_bytes_to + { + return Ok(CompareOk::Equal); + } + + let mut from = prepare_reader(¶ms.from, params.skip_bytes_from)?; + let mut to = prepare_reader(¶ms.to, params.skip_bytes_to)?; + + let mut offset_width = params.bytes_limit.unwrap_or(BytesLimitU64::MAX); + + if let (Ok(a_meta), Ok(b_meta)) = (fs::metadata(¶ms.from), fs::metadata(¶ms.to)) { + #[cfg(not(target_os = "windows"))] + let (from_size, to_size) = (a_meta.size(), b_meta.size()); + + #[cfg(target_os = "windows")] + let (from_size, to_size) = (a_meta.file_size(), b_meta.file_size()); + + // If the files have different sizes, we already know they are not identical. If we have not + // been asked to show even the first difference, we can quit early. + if params.silent && from_size != to_size { + return Ok(CompareOk::Different); + } + + let smaller = cmp::min(from_size, to_size); + offset_width = cmp::min(smaller, offset_width); + } + + let offset_width = 1 + offset_width.checked_ilog10().unwrap_or(1) as usize; + + // Capacity calc: at_byte width + 2 x 3-byte octal numbers + 2 x 4-byte value + 4 spaces + let mut output = Vec::::with_capacity(offset_width + 3 * 2 + 4 * 2 + 4); + + let mut at_byte = 1; + let mut at_line = 1; + let mut start_of_line = true; + let mut stdout = BufWriter::new(io::stdout().lock()); + let mut compare = CompareOk::Equal; + loop { + // Fill up our buffers. + let from_buf = match from.fill_buf() { + Ok(buf) => buf, + Err(e) => { + let io = e.map_err_context(|| params.from_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + }; + + let to_buf = match to.fill_buf() { + Ok(buf) => buf, + Err(e) => { + let io = e.map_err_context(|| params.to_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + }; + + // Check for EOF conditions. + if from_buf.is_empty() && to_buf.is_empty() { + break; + } + + if from_buf.is_empty() || to_buf.is_empty() { + let eof_on = if from_buf.is_empty() { + ¶ms.from.to_string_lossy() + } else { + ¶ms.to.to_string_lossy() + }; + + report_eof(at_byte, at_line, start_of_line, eof_on, params); + return Ok(CompareOk::Different); + } + + // Fast path - for long files in which almost all bytes are the same we + // can do a direct comparison to let the compiler optimize. + let consumed = std::cmp::min(from_buf.len(), to_buf.len()); + if from_buf[..consumed] == to_buf[..consumed] { + let last = from_buf[..consumed].last().unwrap(); + + at_byte += consumed as BytesLimitU64; + // at_line += from_buf[..consumed].iter().filter(|&c| *c == b'\n').count() as u64; + at_line += bytecount::count(&from_buf[..consumed], b'\n') as u64; + + start_of_line = *last == b'\n'; + + if let Some(bytes_limit) = params.bytes_limit { + if at_byte > bytes_limit { + break; + } + } + + from.consume(consumed); + to.consume(consumed); + + continue; + } + + // Iterate over the buffers, the zip iterator will stop us as soon as the + // first one runs out. + for (&from_byte, &to_byte) in from_buf.iter().zip(to_buf.iter()) { + if from_byte != to_byte { + compare = CompareOk::Different; + + if params.verbose { + format_verbose_difference( + from_byte, + to_byte, + at_byte, + offset_width, + &mut output, + params, + ); + // TODO test error returns exit code 2 + stdout.write_all(output.as_slice())?; + // if let Err(e) = stdout.write_all(output.as_slice()) + // // .map_err(|e| format!("{}: error printing output: {e}", uucore::util_name())) + // { + // return Err(CmpError::FileIo("stdout".into(), e)); + // } + output.clear(); + } else { + report_difference(from_byte, to_byte, at_byte, at_line, params)?; + return Ok(CompareOk::Different); + } + } + + start_of_line = from_byte == b'\n'; + if start_of_line { + at_line += 1; + } + + at_byte += 1; + + if let Some(max_bytes) = params.bytes_limit { + if at_byte > max_bytes { + break; + } + } + } + + // Notify our readers about the bytes we went over. + from.consume(consumed); + to.consume(consumed); + } + + Ok(compare) +} + +fn prepare_reader( + path: &OsString, + ignore_initial: Option, +) -> Result, UtilsError> { + let mut reader: Box = if path == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + match fs::File::open(path) { + Ok(file) => Box::new(BufReader::new(file)), + Err(e) => { + let io = e.map_err_context(|| path.to_string_lossy().to_string()); + return Err(UtilsError::Io(io)); + } + } + }; + + #[allow(clippy::collapsible_if)] + if let Some(skip) = ignore_initial { + if let Err(e) = io::copy(&mut reader.by_ref().take(skip), &mut io::sink()) { + let io = e.map_err_context(|| path.to_string_lossy().to_string()); + return Err(UtilsError::Io(io)); + } + } + + Ok(reader) +} + +#[inline] +fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str { + *buf = [b' ', b' ', b'0']; + + let mut num = byte; + let mut idx = 2; // Start at the last position in the buffer + + // Generate octal digits + while num > 0 { + buf[idx] = b'0' + num % 8; + num /= 8; + idx = idx.saturating_sub(1); + } + + // SAFETY: the operations we do above always land within ascii range. + unsafe { std::str::from_utf8_unchecked(&buf[..]) } +} + +// This function has been optimized to not use the Rust fmt system, which +// leads to a massive speed up when processing large files: cuts the time +// for comparing 2 ~36MB completely different files in half on an M1 Max. +#[inline] +fn format_verbose_difference( + from_byte: u8, + to_byte: u8, + at_byte: BytesLimitU64, + offset_width: usize, + output: &mut Vec, + params: &Params, +) { + assert!(!params.silent); + + let mut at_byte_buf = itoa::Buffer::new(); + let mut from_oct = [0u8; 3]; // for octal conversions + let mut to_oct = [0u8; 3]; + + if params.print_bytes { + // "{:>width$} {:>3o} {:4} {:>3o} {}", + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width.saturating_sub(at_byte_str.len()); + + for _ in 0..at_byte_padding { + output.push(b' '); + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + write_visible_byte_padded(output, from_byte); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b' '); + + write_visible_byte(output, to_byte); + + output.push(b'\n'); + } else { + // "{:>width$} {:>3o} {:>3o}" + let at_byte_str = at_byte_buf.format(at_byte); + let at_byte_padding = offset_width - at_byte_str.len(); + + for _ in 0..at_byte_padding { + output.push(b' '); + } + + output.extend_from_slice(at_byte_str.as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(from_byte, &mut from_oct).as_bytes()); + + output.push(b' '); + + output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes()); + + output.push(b'\n'); + } +} + +/// Formats a byte as a visible string (for non-performance-critical path) +#[inline] +fn format_visible_byte(byte: u8) -> String { + let mut result = Vec::with_capacity(4); + write_visible_byte(&mut result, byte); + // SAFETY: the checks and shifts in write_visible_byte match what cat and GNU + // cmp do to ensure characters fall inside the ascii range. + unsafe { String::from_utf8_unchecked(result) } +} + +fn is_posix_locale() -> bool { + let locale = if let Ok(locale) = env::var("LC_ALL") { + locale + } else if let Ok(locale) = env::var("LC_MESSAGES") { + locale + } else if let Ok(locale) = env::var("LANG") { + locale + } else { + "C".to_string() + }; + + locale == "C" || locale == "POSIX" +} + +#[inline] +fn report_difference( + from_byte: u8, + to_byte: u8, + at_byte: BytesLimitU64, + at_line: u64, + params: &Params, +) -> io::Result<()> { + if params.silent { + return Ok(()); + } + + let term = if is_posix_locale() && !params.print_bytes { + "char" + } else { + "byte" + }; + print!( + "{} {} differ: {term} {}, line {}", + ¶ms.from.to_string_lossy(), + ¶ms.to.to_string_lossy(), + at_byte, + at_line + ); + if params.print_bytes { + let char_width = if to_byte >= 0x7F { 2 } else { 1 }; + print!( + " is {:>3o} {:char_width$} {:>3o} {:char_width$}", + from_byte, + format_visible_byte(from_byte), + to_byte, + format_visible_byte(to_byte) + ); + } + // Instead of println!(), which panics in case of error (> /dev/full). + let mut stdout = io::stdout(); + writeln!(stdout)?; + stdout.flush()?; + + Ok(()) +} + +#[inline] +fn report_eof( + at_byte: BytesLimitU64, + at_line: u64, + start_of_line: bool, + eof_on: &str, + params: &Params, +) { + if params.silent { + return; + } + + if at_byte == 1 { + eprintln!( + "{}: EOF on '{}' which is empty", + uucore::util_name(), + eof_on + ); + } else if params.verbose { + eprintln!( + "{}: EOF on '{}' after byte {}", + uucore::util_name(), + eof_on, + at_byte - 1, + ); + } else if start_of_line { + eprintln!( + "{}: EOF on '{}' after byte {}, line {}", + uucore::util_name(), + eof_on, + at_byte - 1, + at_line - 1 + ); + } else { + eprintln!( + "{}: EOF on '{}' after byte {}, in line {}", + uucore::util_name(), + eof_on, + at_byte - 1, + at_line + ); + } +} + +#[inline] +fn write_visible_byte(output: &mut Vec, byte: u8) -> usize { + match byte { + // Control characters: ^@, ^A, ..., ^_ + 0..=31 => { + output.push(b'^'); + output.push(byte + 64); + 2 + } + // Printable ASCII (space through ~) + 32..=126 => { + output.push(byte); + 1 + } + // DEL: ^? + 127 => { + output.extend_from_slice(b"^?"); + 2 + } + // High bytes with control equivalents: M-^@, M-^A, ..., M-^_ + 128..=159 => { + output.push(b'M'); + output.push(b'-'); + output.push(b'^'); + output.push(byte - 64); + 4 + } + // High bytes: M-, M-!, ..., M-~ + 160..=254 => { + output.push(b'M'); + output.push(b'-'); + output.push(byte - 128); + 3 + } + // Byte 255: M-^? + 255 => { + output.extend_from_slice(b"M-^?"); + 4 + } + } +} + +/// Writes a byte in visible form with right-padding to 4 spaces. +#[inline] +fn write_visible_byte_padded(output: &mut Vec, byte: u8) { + const SPACES: &[u8] = b" "; + const WIDTH: usize = SPACES.len(); + + let display_width = write_visible_byte(output, byte); + + // Add right-padding spaces + let padding = WIDTH.saturating_sub(display_width); + output.extend_from_slice(&SPACES[..padding]); +} + +// Required for build.rs +pub fn uu_app() -> Command { + params_cmp::uu_app() +} diff --git a/src/uu/cmp/src/main.rs b/src/uu/cmp/src/main.rs new file mode 100644 index 0000000..5a41137 --- /dev/null +++ b/src/uu/cmp/src/main.rs @@ -0,0 +1,6 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +uudiff::bin!(uu_cmp); diff --git a/src/uu/cmp/src/params_cmp.rs b/src/uu/cmp/src/params_cmp.rs new file mode 100644 index 0000000..2c12116 --- /dev/null +++ b/src/uu/cmp/src/params_cmp.rs @@ -0,0 +1,367 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! This is the parser for the cmp utility. +//! +//! It uses the parsed data clap provides and fills the [params] for cmp. +//! It contains the allowed options, specific parsing logic and parsing error messages. +//! +use clap::{Arg, ArgAction, Command}; +use std::ffi::OsString; +use uucore::parser::parse_size::{ParseSizeError, Parser}; +use uudiff::common_errors::UParseError; +use uudiff::translate; + +/// For option --bytes, set to u64, so large size limits can +/// be expressed, like Exabyte. \ +/// This could be set to u128 with small modifications, +/// but AFAIK file sizes (metadata) can not exceed u64. +/// This is also limiting the compare function to u64::MAX +/// as this is the default value. +pub type BytesLimitU64 = u64; +/// For option --ignore initial, should not be changed. +pub type SkipU64 = u64; + +/// Units up eo Exabyte (EiB) following GNU documentation: \ +/// . +// "kB" | "KB" => 1_000, +// "k" | "K" | "KiB" | "kiB" => 1_024, +// "MB" => 1_000_000, +// "m" | "M" | "MiB" => 1_048_576, +// "GB" => 1_000_000_000, +// "g" | "G" | "GiB" => 1_073_741_824, +// "TB" => 1_000_000_000_000, +// "t" | "T" | "TiB" => 1_099_511_627_776, +// "PB" => 1_000_000_000_000_000, +// "p" | "P" | "PiB" => 1_125_899_906_842_624, +// "EB" => 1_000_000_000_000_000_000, +// "e" | "E" | "EiB" => 1_152_921_504_606_846_976, +const ALLOWED_UNITS: [&str; 26] = [ + "kB", "KB", "k", "K", "KiB", "kiB", "MB", "m", "M", "MiB", "GB", "g", "G", "GiB", "TB", "t", + "T", "TiB", "PB", "p", "P", "PiB", "EB", "e", "E", "EiB", +]; + +// Allowed utility arguments (options) +pub mod options { + + /// Generic option for files and other undefined operands + pub const FILE: &str = "file"; + /// -n, --bytes=LIMIT compare at most LIMIT bytes + pub const BYTES_LIMIT: &str = "bytes"; + /// -i, --ignore-initial=SKIP skip first SKIP bytes of both inputs + /// -i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and + pub const IGNORE_INITIAL: &str = "ignore-initial"; + // pub const IGNORE_INITIAL: &str = "SKIP[:SKIP2]"; + /// -b, --print-bytes print differing bytes + pub const PRINT_BYTES: &str = "print-bytes"; + /// -s, --quiet, --silent suppress all normal output + pub const QUIET: &str = "quiet"; + pub const SILENT: &str = "silent"; + /// -l, --verbose output byte numbers and differing byte values + pub const VERBOSE: &str = "verbose"; +} + +/// Holds the given command line arguments except "--version" and "--help". +#[derive(Debug, Default, Clone, PartialEq)] +pub struct Params { + /// path or "-" for stdin + pub from: OsString, + pub to: OsString, + /// -n, --bytes=LIMIT compare at most LIMIT bytes + /// cmp from diffutils has a limit of i64::MAX (9_223_372_036_854_775_807) + pub bytes_limit: Option, + /// -i, --ignore-initial=SKIP skip first SKIP bytes of both inputs + pub skip_bytes_from: Option, + /// -i, --ignore-initial=SKIP1:SKIP2 skip first SKIP1 bytes of FILE1 and + pub skip_bytes_to: Option, + /// -b, --print-bytes print differing bytes + pub print_bytes: bool, + /// -s, --quiet, --silent suppress all normal output \ + /// Do not set directly, use set_silent(). + pub silent: bool, + /// -l, --verbose output byte numbers and differing byte values \ + /// Do not set directly, use set_verbose(). + pub verbose: bool, +} + +impl Params { + pub fn from_as_string_lossy(&self) -> String { + self.from.to_string_lossy().to_string() + } + + pub fn to_as_string_lossy(&self) -> String { + self.to.to_string_lossy().to_string() + } + + /// Sets the --bytes limit and returns the input as number. + /// + /// bytes - unparsed number string, e.g. '50KiB' + pub fn set_bytes_limit(&mut self, num_unit: &str) -> Result { + let num = Self::parse_num_bytes(num_unit).map_err(|e| { + UParseError::ParseSizeError(options::BYTES_LIMIT, num_unit.to_string(), e) + })?; + + self.bytes_limit = Some(num); + Ok(num) + } + + pub fn set_print_bytes(&mut self, value: bool) -> Result<(), UParseError> { + // Should actually raise an error if --silent is set, but GNU cmp does not do that. + if value && self.silent { + return Err(UParseError::OptionsIncompatible( + options::PRINT_BYTES, + options::SILENT, + )); + } + self.print_bytes = value; + + Ok(()) + } + + /// Sets the ignore initial bytes for both files. + /// + /// Accepts digits[unit][:digits[unit]] \ + /// Sets the 2nd file to the value of the 1st file if no second parameter is given. \ + pub fn set_skip_bytes(&mut self, bytes: &str) -> Result<(), UParseError> { + // empty string is not checked + + // Split at ':' if present + let (skip_1, skip_2) = match bytes.split_once(':') { + Some((s1, s2)) => (s1, s2), + None => { + // set file_to to same value as file_from + (bytes, bytes) + } + }; + + self.set_skip_bytes_file_no(skip_1, 1)?; + self.set_skip_bytes_file_no(skip_2, 2)?; + + Ok(()) + } + + /// Sets the [Self::skip_bytes_from] or [Self::skip_bytes_to] value. + /// + /// GNU cmp always uses the higher number in case of conflicting definitions + /// with --ignore-initial and operand + fn set_skip_bytes_file_no( + &mut self, + bytes_num_unit: &str, + file_no: i32, + ) -> Result { + let skip = match Self::parse_num_bytes(bytes_num_unit) { + Ok(r) => r, + Err(e) => { + return Err(UParseError::ParseSizeError( + options::IGNORE_INITIAL, + bytes_num_unit.to_string(), + e, + )); + } + }; + match file_no { + // use higher value + 1 => { + self.skip_bytes_from = match self.skip_bytes_from { + Some(v) => Some(skip.max(v)), + None => Some(skip), + } + } + 2 => { + self.skip_bytes_to = match self.skip_bytes_to { + Some(v) => Some(skip.max(v)), + None => Some(skip), + } + } + _ => panic!("logic error"), + } + + Ok(skip) + } + + pub fn set_verbose(&mut self, value: bool) -> Result<(), UParseError> { + if value && self.silent { + return Err(UParseError::OptionsIncompatible( + options::VERBOSE, + options::SILENT, + )); + } + self.verbose = value; + Ok(()) + } + + /// Parse a SIZE string into a number of bytes. + /// A size string comprises an integer and an optional unit. + /// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1. + /// Default is K. + fn parse_num_bytes(input: &str) -> Result { + let size = Parser::default() + .with_allow_list(&ALLOWED_UNITS) + // .with_default_unit("K") + // .with_b_byte_count(true) + .parse(input.trim())?; + + SkipU64::try_from(size).map_err(|_| ParseSizeError::SizeTooBig(input.to_string())) + } +} + +/// Converts clap args to params. +impl TryFrom for Params { + type Error = UParseError; + + fn try_from(matches: clap::ArgMatches) -> Result { + // dbg!(&matches); + + let mut params = Self { + silent: matches.get_flag(options::SILENT) || matches.get_flag(options::QUIET), + ..Default::default() + }; + params.set_verbose(matches.get_flag(options::VERBOSE))?; + params.set_print_bytes(matches.get_flag(options::PRINT_BYTES))?; + + // has bytes-limit? + if let Some(byte_str) = matches.get_one::(options::BYTES_LIMIT) { + params.set_bytes_limit(byte_str)?; + } + + // has ignore-initial? + if let Some(skip_str) = matches.get_one::(options::IGNORE_INITIAL) { + // dbg!(&skip_str); + params.set_skip_bytes(skip_str)?; + } + + // get files + let files: Vec = match matches.get_many::(options::FILE) { + Some(v) => v.cloned().collect(), + None => return Err(UParseError::MissingOperand(uucore::util_name().to_string())), + }; + // dbg!(&files); + + match files.len() { + 0 => return Err(UParseError::MissingOperand(uucore::util_name().to_string())), + // If only file_1 is set, then file_2 defaults to '-', so it reads from StandardInput. + 1 => { + params.from.clone_from(&files[0]); + params.to = "-".into(); + } + 2..=4 => { + params.from.clone_from(&files[0]); + params.to.clone_from(&files[1]); + // ignore if ignore-initial is already set by option + if files.len() > 2 { + params.set_skip_bytes_file_no(&files[2].to_string_lossy(), 1)?; + if let Some(skip) = files.get(3) { + params.set_skip_bytes_file_no(&skip.to_string_lossy(), 2)?; + } + } + } + _ => { + return Err(UParseError::ExtraOperand(files[4].clone())); + } + } + + // Do as GNU cmp, and completely disable printing if we are + // outputting to /dev/null. + #[cfg(not(target_os = "windows"))] + if is_stdout_dev_null() { + params.silent = true; + params.verbose = false; + params.print_bytes = false; + } + + // dbg!(¶ms); + Ok(params) + } +} + +#[cfg(not(target_os = "windows"))] +fn is_stdout_dev_null() -> bool { + use std::{ + fs, io, + os::{fd::AsRawFd, unix::fs::MetadataExt}, + }; + + let Ok(dev_null) = fs::metadata("/dev/null") else { + return false; + }; + + let stdout_fd = io::stdout().lock().as_raw_fd(); + + // SAFETY: we have exclusive access to stdout right now. + let stdout_file = unsafe { + use std::os::fd::FromRawFd; + fs::File::from_raw_fd(stdout_fd) + }; + let Ok(stdout) = stdout_file.metadata() else { + return false; + }; + + let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); + + // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). + std::mem::forget(stdout_file); + + is_dev_null +} + +pub fn uu_app() -> Command { + Command::new(uucore::util_name()) + .version(uucore::crate_version!()) + .help_template(uucore::localized_help_template(uucore::util_name())) + .override_usage(uucore::format_usage(&translate!("cmp-usage"))) + .about(translate!("cmp-about")) + .infer_long_args(true) + .arg( + Arg::new(options::FILE) + .action(ArgAction::Append) + .hide(true) + .value_hint(clap::ValueHint::FilePath) + .value_parser(clap::value_parser!(OsString)), + ) + .arg( + Arg::new(options::BYTES_LIMIT) + .long("bytes") + .short('n') + .value_name("LIMIT") + .action(ArgAction::Set) + .help(translate!("cmp-help-bytes-limit")), + ) + .arg( + Arg::new(options::IGNORE_INITIAL) + .long("ignore-initial") + .short('i') + .value_name("SKIP[:SKIP2]") + .action(ArgAction::Set) + .help(translate!("cmp-help-ignore-initial")), + ) + .arg( + Arg::new(options::PRINT_BYTES) + .long("print-bytes") + .short('b') + .action(ArgAction::SetTrue) + .help(translate!("cmp-help-print-bytes")), + ) + .arg( + Arg::new(options::QUIET) + .long("quiet") + .action(ArgAction::SetTrue) + .help(translate!("cmp-help-quiet")), + ) + .arg( + Arg::new(options::SILENT) + .long("silent") + .short('s') + // .visible_alias(options::QUIET) works, but shows different --help + .action(ArgAction::SetTrue) + .help(translate!("cmp-help-silent")), + ) + .arg( + Arg::new(options::VERBOSE) + .long("verbose") + .short('l') + .action(ArgAction::SetTrue) + .help(translate!("cmp-help-verbose")), + ) +} diff --git a/src/uu/diff/Cargo.toml b/src/uu/diff/Cargo.toml new file mode 100644 index 0000000..94c0232 --- /dev/null +++ b/src/uu/diff/Cargo.toml @@ -0,0 +1,56 @@ +[package] +name = "uu_diff" +description = "diff ~ (uutils) decode/encode input (diff file compare)" +# The tool dist does not allow different repository names within the workspace. +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/diff" +repository = "https://github.com/uutils/diffutils" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[[bin]] +name = "diff" +path = "src/main.rs" + +[lib] +path = "src/diff.rs" + +[features] +default = ["feat_run_binary_bench" ] +# The cmd benchmarks start the binaries and take a lot of runtime on the github checks. +# Only run them locally. +feat_run_binary_bench = [] + +[dependencies] +clap.workspace = true +diff_crate.workspace = true +fluent.workspace = true +regex.workspace = true +same-file.workspace = true +uucore.workspace = true +uudiff.workspace = true +unicode-width.workspace = true + +[dev-dependencies] +divan.workspace = true +pretty_assertions.workspace = true +rand.workspace = true +tempfile.workspace = true + +[profile.release] +# for flamegraph +# debug = 1 + +[[bench]] +name = "diff_bench" +harness = false + diff --git a/src/uu/diff/LICENSE b/src/uu/diff/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/diff/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/diff/benches/diff_bench.rs b/src/uu/diff/benches/diff_bench.rs new file mode 100644 index 0000000..7a2e903 --- /dev/null +++ b/src/uu/diff/benches/diff_bench.rs @@ -0,0 +1,140 @@ +// #![allow(unused)] +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Benches for all utils in diffutils. +//! +//! There is a file generator included to create files of different sizes for comparison. \ +//! Set the TEMP_DIR const to keep the files. df_to_ files have small changes in them, search for '#'. \ +//! File generation up to 1 GB is really fast, Benchmarking above 100 MB takes very long. + +/// Generate test files with these sizes in KB. +const FILE_SIZES_IN_KILO_BYTES: [u64; 4] = [100, 1 * MB, 10 * MB, 25 * MB]; +const NUM_DIFF: u64 = 4; +// Empty String to use TempDir (files will be removed after test) or specify dir to keep generated files +const TEMP_DIR: &str = ""; +// just for FILE_SIZE_KILO_BYTES +const MB: u64 = 1_000; + +use divan::Bencher; +use std::{path::Path, sync::OnceLock}; +use tempfile::TempDir; +use uudiff::benchmark::{ + // bench_binary, + prepare_bench::{BenchContext, generate_test_files_bytes}, + str_to_args, +}; + +// bench the time it takes to parse the command line arguments +#[divan::bench] +fn diff_parser_old(bencher: Bencher) { + let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; + let args = str_to_args(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args.clone()) + .bench_values(|data| uu_diff::params_old::parse_params(data)); +} + +// bench the time it takes to parse the command line arguments +#[divan::bench(sample_size = 100)] +fn diff_parser_clap(bencher: Bencher) { + let cmd = "diff file_1.txt file_2.txt -s --brief --expand-tabs --width=100"; + let args_prep = str_to_args(&cmd).into_iter().peekable(); + bencher + .with_inputs(|| args_prep.clone()) + .bench_values(|args| { + let args = uu_diff::clap_preparation(args); + let matches = uudiff::clap_localization::handle_clap_result_with_exit_code( + uu_diff::uu_app(), + args, + 2, + ) + .unwrap(); + let _params: uu_diff::params_diff::Params = matches.try_into().unwrap(); + }); +} + +// bench the actual compare +// bench equal, full file read +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn diff_compare_files_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let cmd = format!("diff {} {}", fp.from, fp.to); + let args = str_to_args(&cmd).into_iter(); + + bencher + // .with_inputs(|| prepare::diff_params_identical_testfiles(lines)) + .with_inputs(|| args.clone()) + .bench_refs(|params| uu_diff::uumain(params.peekable())); +} + +// bench original GNU diff +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_gnu_equal(bencher: Bencher, kb: u64) { + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| uudiff::benchmark::bench_binary::bench_binary("diff", cmd_args)); +} + +// bench the compiled release version +#[cfg(feature = "feat_run_binary_bench")] +#[divan::bench(args = FILE_SIZES_IN_KILO_BYTES)] +fn cmd_diff_release_equal(bencher: Bencher, kb: u64) { + // search for src, then shorten path + let dir = std::env::current_dir().unwrap(); + let path = dir.to_string_lossy(); + let path = path.trim_end_matches("src/uu/diff"); + let prg = path.to_string() + "target/release/diff"; + + let fp = get_context().get_files_equal_kb(kb).unwrap(); + let args_str = format!("{} {}", fp.from, fp.to); + + bencher + // .with_inputs(|| prepare::cmp_params_identical_testfiles(lines)) + .with_inputs(|| args_str.clone()) + .bench_refs(|cmd_args| uudiff::benchmark::bench_binary::bench_binary(&prg, cmd_args)); +} + +// Since each bench function is separate in Divan it is more difficult to dynamically create test data. +// This keeps the TempDir alive until the program exits and generates the files only once. +static SHARED_CONTEXT: OnceLock = OnceLock::new(); +/// Creates the test files once and provides them to all tests. +pub fn get_context() -> &'static BenchContext { + SHARED_CONTEXT.get_or_init(|| { + let mut ctx = BenchContext::default(); + if TEMP_DIR.is_empty() { + let tmp_dir = TempDir::new().expect("Failed to create temp dir"); + ctx.tmp_dir = Some(tmp_dir); + } else { + // uses current directory, the generated files are kept + let path = Path::new(TEMP_DIR); + if !path.exists() { + std::fs::create_dir_all(path).expect("Path {path} could not be created"); + } + ctx.dir = TEMP_DIR.to_string(); + }; + + // generate test bytes + for kb in FILE_SIZES_IN_KILO_BYTES { + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, 0, "eq") + .expect("generate_test_files failed"); + ctx.files_equal.push(f); + let f = generate_test_files_bytes(ctx.get_path(), kb * 1000, NUM_DIFF, "df") + .expect("generate_test_files failed"); + ctx.files_different.push(f); + } + + ctx + }) +} + +fn main() { + // Run registered benchmarks. + divan::main(); +} diff --git a/src/uu/diff/locales/en-US.ftl b/src/uu/diff/locales/en-US.ftl new file mode 100644 index 0000000..5a0dba1 --- /dev/null +++ b/src/uu/diff/locales/en-US.ftl @@ -0,0 +1,63 @@ +# TODO French translation +diff-about = Compare two text files for differences. + + With no FILE, or when FILE is -, read standard input. +diff-usage = diff [OPTION]... FILE1 [FILE2] + +# Help messages +# These are auto-generated from GNU diff help and need to be rephrased +diff-help-brief = report only when files differ +diff-help-color = color output; WHEN is 'never', 'always', or 'auto'; +diff-help-context = output NUM (default 3) lines of copied context +diff-help-ed = output an ed script +diff-help-exclude = exclude files that match PAT +diff-help-exclude-from = exclude files that match any pattern in FILE +diff-help-expand-tabs = expand tabs to spaces in output +diff-help-from-file = compare FILE1 to all operands; +diff-help-gtype-group-format = format GTYPE input groups with GFMT +diff-help-horizon-lines = keep NUM lines of the common prefix and suffix +diff-help-ifdef = output merged file with '#ifdef NAME' diffs +diff-help-ignore-all-space = ignore all white space +diff-help-ignore-blank-lines = ignore changes where lines are all blank +diff-help-ignore-case = ignore case differences in file contents +diff-help-ignore-file-name-case = ignore case when comparing file names +diff-help-ignore-matching-lines = ignore changes where all lines match REGEXP +diff-help-ignore-space-change = ignore changes in the amount of white space +diff-help-ignore-tab-expansion = ignore changes due to tab expansion +diff-help-ignore-trailing-space = ignore white space at line end +diff-help-initial-tab = make tabs line up by prepending a tab +diff-help-label = LABEL use LABEL instead of file name and timestamp +diff-help-left-column = output only the left column of common lines +diff-help-line-format = format all input lines with LFMT +diff-help-ltype-line-format = format LTYPE input lines with LFMT +diff-help-minimal = try hard to find a smaller set of changes +diff-help-new-file = treat absent files as empty +diff-help-no-dereference = don't follow symbolic links +diff-help-no-ignore-file-name-case = consider case when comparing file names +diff-help-normal = output a normal diff (the default) +diff-help-paginate = pass output through 'pr' to paginate it +diff-help-palette = the colors to use when --color is active; PALETTE is +diff-help-rcs = output an RCS format diff +diff-help-recursive = recursively compare any subdirectories found +diff-help-report-identical-files = report when two files are the same +diff-help-show-c-function = show which C function each change is in +diff-help-show-function-line = show the most recent line matching REGEXP +diff-help-side-by-side = output in two columns +diff-help-speed-large-files = assume large files and many scattered small changes +diff-help-starting-file = start with FILE when comparing directories +diff-help-strip-trailing-cr = strip trailing carriage return on input +diff-help-suppress-blank-empty = suppress space or tab before empty output lines +diff-help-suppress-common-lines = do not output common lines +diff-help-tabsize = tab stops every NUM (default 8) print columns +diff-help-text = treat all files as text +diff-help-to-file = compare all operands to FILE2; +diff-help-unidirectional-new-file = treat absent first files as empty +diff-help-unified = output NUM (default 3) lines of unified context +diff-help-width = output at most NUM (default 130) print columns + +# Info messages +diff-info-files-are-identical = Files { $file_1 } and { $file_2 } are identical +diff-info-files-are-different = Files { $file_1 } and { $file_2 } differ + +# Error messages +diff-error-missing-newline = No newline at end of file diff --git a/src/context_diff.rs b/src/uu/diff/src/context_diff.rs similarity index 95% rename from src/context_diff.rs rename to src/uu/diff/src/context_diff.rs index 873fc3d..e5603c8 100644 --- a/src/context_diff.rs +++ b/src/uu/diff/src/context_diff.rs @@ -1,14 +1,16 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore alef alefr alefx betr betx nodiff use std::collections::VecDeque; use std::io::Write; -use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use crate::params_diff::Params; +use uudiff::utils::do_write_line; +use uudiff::utils::get_modification_time; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -30,8 +32,8 @@ struct Mismatch { } impl Mismatch { - fn new(line_number_expected: usize, line_number_actual: usize) -> Mismatch { - Mismatch { + fn new(line_number_expected: usize, line_number_actual: usize) -> Self { + Self { line_number_expected, line_number_actual, expected: Vec::new(), @@ -77,9 +79,9 @@ fn make_diff( // Rust only allows allocations to grow to isize::MAX, and this is bigger than that. let mut expected_lines_change_idx: usize = !0; - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -101,7 +103,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch > context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -132,7 +134,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { expected_lines_change_idx = !0; // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) @@ -278,7 +280,7 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { to_modified_time ) .into_bytes(); - let diff_results = make_diff(expected, actual, params.context_count, params.brief); + let diff_results = make_diff(expected, actual, params.n_output_lines, params.brief); if diff_results.is_empty() { return Vec::new(); } @@ -384,7 +386,7 @@ mod tests { #[test] fn test_permutations() { // test all possible six-line files. - let target = "target/context-diff/"; + let target = "../../../target/context-diff/"; let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { for &b in &[0, 1, 2] { @@ -435,7 +437,7 @@ mod tests { &Params { from: "a/alef".into(), to: (&format!("{target}/alef")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -450,7 +452,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() @@ -470,7 +474,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/context-diff/"; + let target = "../../../target/context-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -516,7 +520,7 @@ mod tests { &Params { from: "a/alef_".into(), to: (&format!("{target}/alef_")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -531,7 +535,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() @@ -551,7 +557,7 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/context-diff/"; + let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -600,7 +606,7 @@ mod tests { &Params { from: "a/alefx".into(), to: (&format!("{target}/alefx")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -615,7 +621,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() @@ -635,7 +643,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/context-diff/"; + let target = "../../../target/context-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -687,7 +695,7 @@ mod tests { &Params { from: "a/alefr".into(), to: (&format!("{target}/alefr")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -702,7 +710,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .arg("--context") .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() @@ -722,7 +732,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/uu/diff/src/diff.rs b/src/uu/diff/src/diff.rs new file mode 100644 index 0000000..f22001c --- /dev/null +++ b/src/uu/diff/src/diff.rs @@ -0,0 +1,234 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// Re-export the public functions/types you need +// pub use context_diff::diff as context_diff; +// pub use ed_diff::diff as ed_diff; +// pub use normal_diff::diff as normal_diff; +// pub use side_diff::diff as side_by_side_diff; +// pub use unified_diff::diff as unified_diff; + +pub mod context_diff; +pub mod ed_diff; +pub mod normal_diff; +pub mod params_diff; +// not used anymore, only for bench +pub mod params_old; +pub mod side_diff; +pub mod unified_diff; + +use crate::params_diff::{FormatOutput, Params}; +use clap::Command; +use std::ffi::OsString; +use std::fs; +use std::io::{self, Read, Write, stdout}; +use uudiff::common_errors::UtilsError; +use uudiff::error::{FromIo, UIoError, UResult}; +use uudiff::utils::CompareOk; +use uudiff::{translate, utils}; + +/// Entry into diff. +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let args_checked = clap_preparation(args); + let matches = + uudiff::clap_localization::handle_clap_result_with_exit_code(uu_app(), args_checked, 2)?; + + let params: Params = matches.try_into()?; + + let res = diff_compare(¶ms)?; + match res { + CompareOk::Equal => uucore::error::set_exit_code(0), + CompareOk::Different => uucore::error::set_exit_code(1), + } + + Ok(()) +} + +pub fn clap_preparation(args: impl uucore::Args) -> Vec { + // handle constellations, clap can't do + // so clap is limited to -c=num, while GNU allows -c42 and -42c (and 4c2) + let mut args_checked = Vec::new(); + for mut arg_os in args { + if arg_os.len() > 2 { + let arg = arg_os.to_string_lossy(); + if arg.as_bytes()[0] == b'-' { + // short options with num or multiple short options + let mut opt = '-'; + let mut num = String::new(); + let mut ok = false; + // let c = arg.as_bytes()[1] as char; + for c in arg.chars().skip(1) { + if c.is_ascii_digit() { + num.push(c); + } else if c.is_ascii_lowercase() { + // possibly multi-single-options, e.g. -sc4 is valid + if c == 'c' || c == 'u' { + if opt == '-' { + opt = c; + ok = true; + } else { + // multiple chars, reject + ok = false; + break; + } + } + } else { + // unknown char, reject + ok = false; + break; + } + } + if ok { + // create c=42 structure + let mut s = String::from("-"); + s.push(opt); + s.push('='); + s.push_str(&num); + arg_os = s.into(); + } + } + } + // dbg!(&arg_os); + args_checked.push(arg_os); + } + + args_checked +} + +pub fn diff_compare(params: &Params) -> UResult { + let maybe_report_identical_files = || { + if params.report_identical_files { + let msg = translate!("diff-info-files-are-identical", + "file_1" => params.from.to_string_lossy(), + "file_2" => params.to.to_string_lossy()); + println!("{msg}"); + } + }; + + // if from and to are the same file, no need to perform any comparison + if utils::is_same_file(¶ms.from, ¶ms.to) { + maybe_report_identical_files(); + return Ok(CompareOk::Equal); + } + + // read files + fn read_file_contents(filepath: &OsString) -> io::Result> { + if filepath == "-" { + let mut content = Vec::new(); + io::stdin().read_to_end(&mut content).and(Ok(content)) + } else { + fs::read(filepath) + } + } + + // UIoError has no code https://github.com/uutils/coreutils/issues/11453 + let r_from_content = read_file_contents(¶ms.from); + let r_to_content = read_file_contents(¶ms.to); + + // Diff returns both errors + let from_content = match r_from_content { + Ok(c) => c, + Err(e1) => match r_to_content { + Ok(_) => { + let io = e1.map_err_context(|| params.from_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + Err(e2) => { + let io1 = e1.map_err_context(|| params.from_as_string_lossy()); + let io2 = e2.map_err_context(|| params.to_as_string_lossy()); + return Err(UtilsError::IoDouble(io1, io2).into()); + } + }, + }; + let to_content = match r_to_content { + Ok(c) => c, + Err(e2) => { + let io = e2.map_err_context(|| params.to_as_string_lossy()); + return Err(UtilsError::Io(io).into()); + } + }; + + // run diff + let result: Vec = match params.format_out { + FormatOutput::Normal => normal_diff::diff(&from_content, &to_content, params), + FormatOutput::Unified => unified_diff::diff(&from_content, &to_content, params), + FormatOutput::Context => context_diff::diff(&from_content, &to_content, params), + FormatOutput::Ed => ed_diff::diff(&from_content, &to_content, params)?, + FormatOutput::SideBySide => { + let mut output = stdout().lock(); + side_diff::diff(&from_content, &to_content, &mut output, params) + } + }; + + #[allow(clippy::redundant_else)] + if params.brief && !result.is_empty() { + let msg = translate!("diff-info-files-are-different", + "file_1" => params.from.to_string_lossy(), + "file_2" => params.to.to_string_lossy()); + println!("{msg}"); + return Ok(CompareOk::Different); + } else { + let result = io::stdout().write_all(&result); + match result { + // This code is adapted from coreutils. + // + Ok(()) => {} + Err(err) if err.kind() == std::io::ErrorKind::BrokenPipe => { + // GNU seq prints the Broken pipe message but still exits with status 0 + // unless SIGPIPE was explicitly ignored, in which case it should fail. + let err = err.map_err_context(|| "write error".into()); + uucore::show_error!("{err}"); + #[cfg(unix)] + if uucore::signals::sigpipe_was_ignored() { + uucore::error::set_exit_code(0); + } + } + Err(error) => { + let io = UIoError::from(error); + return Err(UtilsError::Io(io.into()).into()); + } + } + } + + if result.is_empty() { + maybe_report_identical_files(); + Ok(CompareOk::Equal) + } else { + Ok(CompareOk::Different) + } +} + +/// Contains all diff errors and their text messages. +/// +/// All errors can be output easily using the normal Display functionality. +/// To format the error message for the typical diffutils output, use [format_error_text]. +#[derive(Debug, PartialEq, Eq)] +pub enum DiffError { + MissingNL, +} + +impl std::error::Error for DiffError {} + +impl uudiff::error::UError for DiffError { + fn code(&self) -> i32 { + 2 + } +} + +impl std::fmt::Display for DiffError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let msg = match self { + Self::MissingNL => translate!("diff-error-missing-newline"), + }; + + write!(f, "{msg}") + } +} + +// Required for build.rs +pub fn uu_app() -> Command { + crate::params_diff::uu_app() +} diff --git a/src/ed_diff.rs b/src/uu/diff/src/ed_diff.rs similarity index 95% rename from src/ed_diff.rs rename to src/uu/diff/src/ed_diff.rs index b8cdbc5..39650ca 100644 --- a/src/ed_diff.rs +++ b/src/uu/diff/src/ed_diff.rs @@ -1,12 +1,14 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore alef alefr betr nodiff use std::io::Write; -use crate::params::Params; -use crate::utils::do_write_line; +use crate::{DiffError, params_diff::Params}; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -16,26 +18,9 @@ struct Mismatch { pub actual: Vec>, } -#[derive(Debug, PartialEq, Eq)] -pub enum DiffError { - MissingNL, -} - -impl std::fmt::Display for DiffError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { - std::fmt::Display::fmt("No newline at end of file", f) - } -} - -impl From for String { - fn from(_: DiffError) -> String { - "No newline at end of file".into() - } -} - impl Mismatch { - fn new(line_number_expected: usize, line_number_actual: usize) -> Mismatch { - Mismatch { + fn new(line_number_expected: usize, line_number_actual: usize) -> Self { + Self { line_number_expected, line_number_actual, expected: Vec::new(), @@ -71,9 +56,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -81,11 +66,11 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Result { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); line_number_actual += 1; } - diff::Result::Both(_str, _) => { + diff_crate::Result::Both(_str, _) => { line_number_expected += 1; line_number_actual += 1; if !mismatch.actual.is_empty() || !mismatch.expected.is_empty() { @@ -120,7 +105,7 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Result, let mut lines_offset = 0; for result in diff_results { let line_number_expected: isize = result.line_number_expected as isize + lines_offset; - let _line_number_actual: isize = result.line_number_actual as isize + lines_offset; + // let _line_number_actual: isize = result.line_number_actual as isize + lines_offset; let expected_count: isize = result.expected.len() as isize; let actual_count: isize = result.actual.len() as isize; match (expected_count, actual_count) { @@ -179,7 +164,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -259,7 +244,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -333,7 +318,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/ed-diff/"; + let target = "../../../target/ed-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/uu/diff/src/main.rs b/src/uu/diff/src/main.rs new file mode 100644 index 0000000..f3b23b9 --- /dev/null +++ b/src/uu/diff/src/main.rs @@ -0,0 +1,6 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +uudiff::bin!(uu_diff); diff --git a/src/normal_diff.rs b/src/uu/diff/src/normal_diff.rs similarity index 97% rename from src/normal_diff.rs rename to src/uu/diff/src/normal_diff.rs index 002cd01..b84babe 100644 --- a/src/normal_diff.rs +++ b/src/uu/diff/src/normal_diff.rs @@ -1,12 +1,14 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore alef alefn alefr betn betr nodiff use std::io::Write; -use crate::params::Params; -use crate::utils::do_write_line; +use crate::params_diff::Params; +use uudiff::utils::do_write_line; #[derive(Debug, PartialEq)] struct Mismatch { @@ -19,8 +21,8 @@ struct Mismatch { } impl Mismatch { - fn new(line_number_expected: usize, line_number_actual: usize) -> Mismatch { - Mismatch { + fn new(line_number_expected: usize, line_number_actual: usize) -> Self { + Self { line_number_expected, line_number_actual, expected: Vec::new(), @@ -54,9 +56,9 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if !mismatch.actual.is_empty() && !mismatch.actual_missing_nl { results.push(mismatch); mismatch = Mismatch::new(line_number_expected, line_number_actual); @@ -65,12 +67,12 @@ fn make_diff(expected: &[u8], actual: &[u8], stop_early: bool) -> Vec mismatch.expected_missing_nl = line_number_expected > expected_lines_count; line_number_expected += 1; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { mismatch.actual.push(str.to_vec()); mismatch.actual_missing_nl = line_number_actual > actual_lines_count; line_number_actual += 1; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { match ( line_number_expected > expected_lines_count, line_number_actual > actual_lines_count, @@ -228,7 +230,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -306,7 +308,7 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -400,7 +402,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -472,7 +474,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/normal-diff/"; + let target = "../../../target/normal-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { diff --git a/src/uu/diff/src/params_diff.rs b/src/uu/diff/src/params_diff.rs new file mode 100644 index 0000000..ab1ba6f --- /dev/null +++ b/src/uu/diff/src/params_diff.rs @@ -0,0 +1,1256 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +// spell-checker:ignore GFMT GTYPE LFMT LTYPE TABSIZE + +//! This is the parser for the cmp utility. +//! +//! It uses the parsed data clap provides and fills the [Params] for cmp. +//! It contains the allowed options, specific parsing logic and parsing error messages. +//! +use clap::{Arg, ArgAction, Command}; +use std::ffi::OsString; +use std::fmt::Display; +use std::path::PathBuf; +use uudiff::{common_errors::UParseError, translate}; + +/// For option --bytes, set to u64, so large size limits can +/// be expressed, like Exabyte. \ +/// This could be set to u128 with small modifications, +/// but AFAIK file sizes (metadata) can not exceed u64. +/// This is also limiting the compare function to u64::MAX +/// as this is the default value. +pub type BytesLimitU64 = u64; +/// For option --ignore initial, should not be changed. +pub type SkipU64 = u64; + +// Allowed utility arguments (options) +mod options { + /// Generic option for files and other undefined operands + pub const FILE: &str = "file"; + /// -q, --brief report only when files differ + pub const BRIEF: &str = "brief"; + /// --color[=WHEN] color output; WHEN is 'never', 'always', or 'auto'; + pub const COLOR: &str = "color"; + /// -c, -C NUM, --context[=NUM] output NUM (default 3) lines of copied context + pub const CONTEXT_LINES: &str = "context"; + /// -C requires different handling + pub const CONTEXT_LINES_UPPER: &str = "context_upper"; + /// -e, --ed output an ed script + pub const ED: &str = "ed"; + /// -x, --exclude=PAT exclude files that match PAT + pub const EXCLUDE: &str = "exclude"; + /// -X, --exclude-from=FILE exclude files that match any pattern in FILE + pub const EXCLUDE_FROM: &str = "exclude-from"; + /// -t, --expand-tabs expand tabs to spaces in output + pub const EXPAND_TABS: &str = "expand-tabs"; + /// --from-file=FILE1 compare FILE1 to all operands; + pub const FROM_FILE: &str = "from-file"; + /// --GTYPE-group-format=GFMT format GTYPE input groups with GFMT + pub const GTYPE_GROUP_FORMAT: &str = "gtype-group-format"; + /// --horizon-lines=NUM keep NUM lines of the common prefix and suffix + pub const HORIZON_LINES: &str = "horizon-lines"; + /// -D, --ifdef=NAME output merged file with '#ifdef NAME' diffs + pub const IFDEF: &str = "ifdef"; + /// -w, --ignore-all-space ignore all white space + pub const IGNORE_ALL_SPACE: &str = "ignore-all-space"; + /// -B, --ignore-blank-lines ignore changes where lines are all blank + pub const IGNORE_BLANK_LINES: &str = "ignore-blank-lines"; + /// -i, --ignore-case ignore case differences in file contents + pub const IGNORE_CASE: &str = "ignore-case"; + /// --ignore-file-name-case ignore case when comparing file names + pub const IGNORE_FILE_NAME_CASE: &str = "ignore-file-name-case"; + /// -I, --ignore-matching-lines=RE ignore changes where all lines match RE + pub const IGNORE_MATCHING_LINES: &str = "ignore-matching-lines"; + /// -b, --ignore-space-change ignore changes in the amount of white space + pub const IGNORE_SPACE_CHANGE: &str = "ignore-space-change"; + /// -E, --ignore-tab-expansion ignore changes due to tab expansion + pub const IGNORE_TAB_EXPANSION: &str = "ignore-tab-expansion"; + /// -Z, --ignore-trailing-space ignore white space at line end + pub const IGNORE_TRAILING_SPACE: &str = "ignore-trailing-space"; + /// -T, --initial-tab make tabs line up by prepending a tab + pub const INITIAL_TAB: &str = "initial-tab"; + /// --label LABEL use LABEL instead of file name and timestamp + pub const LABEL: &str = "label"; + /// --left-column output only the left column of common lines + pub const LEFT_COLUMN: &str = "left-column"; + /// --line-format=LFMT format all input lines with LFMT + pub const LINE_FORMAT: &str = "line-format"; + /// --LTYPE-line-format=LFMT format LTYPE input lines with LFMT + pub const LTYPE_LINE_FORMAT: &str = "ltype-line-format"; + /// -d, --minimal try hard to find a smaller set of changes + pub const MINIMAL: &str = "minimal"; + /// -N, --new-file treat absent files as empty + pub const NEW_FILE: &str = "new-file"; + /// --no-dereference don't follow symbolic links + pub const NO_DEREFERENCE: &str = "no-dereference"; + /// --no-ignore-file-name-case consider case when comparing file names + pub const NO_IGNORE_FILE_NAME_CASE: &str = "no-ignore-file-name-case"; + /// --normal output a normal diff (the default) + pub const NORMAL: &str = "normal"; + /// -l, --paginate pass output through 'pr' to paginate it + pub const PAGINATE: &str = "paginate"; + /// --palette=PALETTE the colors to use when --color is active; PALETTE is + pub const PALETTE: &str = "palette"; + /// -n, --rcs output an RCS format diff + pub const RCS: &str = "rcs"; + /// -r, --recursive recursively compare any subdirectories found + pub const RECURSIVE: &str = "recursive"; + /// -s, --report-identical-files report when two files are the same + pub const REPORT_IDENTICAL_FILES: &str = "report-identical-files"; + /// -p, --show-c-function show which C function each change is in + pub const SHOW_C_FUNCTION: &str = "show-c-function"; + /// -F, --show-function-line=RE show the most recent line matching RE + pub const SHOW_FUNCTION_LINE: &str = "show-function-line"; + /// -y, --side-by-side output in two columns + pub const SIDE_BY_SIDE: &str = "side-by-side"; + /// --speed-large-files assume large files and many scattered small changes + pub const SPEED_LARGE_FILES: &str = "speed-large-files"; + /// -S, --starting-file=FILE start with FILE when comparing directories + pub const STARTING_FILE: &str = "starting-file"; + /// --strip-trailing-cr strip trailing carriage return on input + pub const STRIP_TRAILING_CR: &str = "strip-trailing-cr"; + /// --suppress-blank-empty suppress space or tab before empty output lines + pub const SUPPRESS_BLANK_EMPTY: &str = "suppress-blank-empty"; + /// --suppress-common-lines do not output common lines + pub const SUPPRESS_COMMON_LINES: &str = "suppress-common-lines"; + /// --tabsize=NUM tab stops every NUM (default 8) print columns + pub const TABSIZE: &str = "tabsize"; + /// -a, --text treat all files as text + pub const TEXT: &str = "text"; + /// --to-file=FILE2 compare all operands to FILE2; + pub const TO_FILE: &str = "to-file"; + /// --unidirectional-new-file treat absent first files as empty + pub const UNIDIRECTIONAL_NEW_FILE: &str = "unidirectional-new-file"; + /// -u, -U NUM, --unified[=NUM] output NUM (default 3) lines of unified context + pub const UNIFIED_LINES: &str = "unified"; + /// -U requires different handling + pub const UNIFIED_LINES_UPPER: &str = "unified_upper"; + /// -W, --width=NUM output at most NUM (default 130) print columns + pub const WIDTH: &str = "width"; +} + +/// Output format +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum FormatOutput { + #[default] + /// Default output + Normal, + Unified, + Context, + /// output in ed editor format + Ed, + /// output in two columns + SideBySide, +} + +impl From<&str> for FormatOutput { + fn from(option: &str) -> Self { + match option { + options::NORMAL => Self::Normal, + options::UNIFIED_LINES => Self::Unified, + options::CONTEXT_LINES => Self::Context, + options::ED => Self::Ed, + options::SIDE_BY_SIDE => Self::SideBySide, + _ => todo!("option '{option}' missing in match"), + } + } +} + +impl Display for FormatOutput { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let opt = match self { + Self::Normal => options::NORMAL, + Self::Unified => options::UNIFIED_LINES, + Self::Context => options::CONTEXT_LINES, + Self::Ed => options::ED, + Self::SideBySide => options::SIDE_BY_SIDE, + }; + write!(f, "{opt}") + } +} + +/// Holds the given command line arguments except "--version" and "--help". +#[derive(Debug, Clone, PartialEq)] +pub struct Params { + /// path or "-" for stdin + pub from: OsString, + pub to: OsString, + /// report only when files differ + pub brief: bool, + /// color output; WHEN is 'never', 'always', or 'auto'; + pub color: Option, + /// output NUM (default 3) lines of copied context + pub n_output_lines: usize, + /// exclude files that match PAT + pub exclude: Option, + /// exclude files that match any pattern in FILE + pub exclude_from: Option, + /// expand tabs to spaces in output + pub expand_tabs: bool, + /// output format + pub format_out: FormatOutput, + /// compare FILE1 to all operands; + pub from_file: Option, + /// format GTYPE input groups with GFMT + pub gtype_group_format: Option, + /// keep NUM lines of the common prefix and suffix + pub horizon_lines: Option, + /// output merged file with '#ifdef NAME' diffs + pub ifdef: Option, + /// ignore all white space + pub ignore_all_space: bool, + /// ignore changes where lines are all blank + pub ignore_blank_lines: bool, + /// ignore case differences in file contents + pub ignore_case: bool, + /// ignore case when comparing file names + pub ignore_file_name_case: bool, + /// ignore changes where all lines match RE + pub ignore_matching_lines: Option, + /// ignore changes in the amount of white space + pub ignore_space_change: bool, + /// ignore changes due to tab expansion + pub ignore_tab_expansion: bool, + /// ignore white space at line end + pub ignore_trailing_space: bool, + /// make tabs line up by prepending a tab + pub initial_tab: bool, + /// LABEL use LABEL instead of file name and timestamp + pub label: bool, + /// output only the left column of common lines + pub left_column: bool, + /// format all input lines with LFMT + pub line_format: Option, + /// format LTYPE input lines with LFMT + pub ltype_line_format: Option, + /// try hard to find a smaller set of changes + pub minimal: bool, + /// treat absent files as empty + pub new_file: bool, + /// don't follow symbolic links + pub no_dereference: bool, + /// consider case when comparing file names + pub no_ignore_file_name_case: bool, + /// pass output through 'pr' to paginate it + pub paginate: bool, + /// the colors to use when --color is active; PALETTE is + pub palette: Option, + /// output an RCS format diff + pub rcs: bool, + /// recursively compare any subdirectories found + pub recursive: bool, + /// report when two files are the same + pub report_identical_files: bool, + /// show which C function each change is in + pub show_c_function: bool, + /// show the most recent line matching RE + pub show_function_line: Option, + /// assume large files and many scattered small changes + pub speed_large_files: bool, + /// start with FILE when comparing directories + pub starting_file: Option, + /// strip trailing carriage return on input + pub strip_trailing_cr: bool, + /// suppress space or tab before empty output lines + pub suppress_blank_empty: bool, + /// do not output common lines + pub suppress_common_lines: bool, + /// tab stops every NUM (default 8) print columns + pub tabsize: usize, + /// treat all files as text + pub text: bool, + /// compare all operands to FILE2; + pub to_file: Option, + /// treat absent first files as empty + pub unidirectional_new_file: bool, + // /// output NUM (default 3) lines of unified context + // pub n_unified_lines: usize, + /// output at most NUM (default 130) print columns + pub width: usize, +} + +#[allow(clippy::default_trait_access)] +impl Default for Params { + fn default() -> Self { + Self { + from: Default::default(), + to: Default::default(), + brief: Default::default(), + color: Default::default(), + n_output_lines: 3, + exclude: Default::default(), + exclude_from: Default::default(), + expand_tabs: Default::default(), + format_out: Default::default(), + from_file: Default::default(), + gtype_group_format: Default::default(), + horizon_lines: Default::default(), + ifdef: Default::default(), + ignore_all_space: Default::default(), + ignore_blank_lines: Default::default(), + ignore_case: Default::default(), + ignore_file_name_case: Default::default(), + ignore_matching_lines: Default::default(), + ignore_space_change: Default::default(), + ignore_tab_expansion: Default::default(), + ignore_trailing_space: Default::default(), + initial_tab: Default::default(), + label: Default::default(), + left_column: Default::default(), + line_format: Default::default(), + ltype_line_format: Default::default(), + minimal: Default::default(), + new_file: Default::default(), + no_dereference: Default::default(), + no_ignore_file_name_case: Default::default(), + paginate: Default::default(), + palette: Default::default(), + rcs: Default::default(), + recursive: Default::default(), + report_identical_files: Default::default(), + show_c_function: Default::default(), + show_function_line: Default::default(), + speed_large_files: Default::default(), + starting_file: Default::default(), + strip_trailing_cr: Default::default(), + suppress_blank_empty: Default::default(), + suppress_common_lines: Default::default(), + tabsize: 8, + text: Default::default(), + to_file: Default::default(), + unidirectional_new_file: Default::default(), + // n_unified_lines: 3, + width: 130, + } + } +} + +impl Params { + pub fn from_as_string_lossy(&self) -> String { + self.from.to_string_lossy().to_string() + } + + pub fn to_as_string_lossy(&self) -> String { + self.to.to_string_lossy().to_string() + } + + // /// Sets the --bytes limit and returns the input as number. + // /// + // /// bytes - unparsed number string, e.g. '50KiB' + // pub fn set_bytes_limit(&mut self, num_unit: &str) -> Result { + // let num = Self::parse_num_bytes(num_unit).map_err(|e| { + // ParseCmpError::ParseSizeError(options::BYTES_LIMIT, num_unit.to_string(), e) + // })?; + // + // self.bytes_limit = Some(num); + // Ok(num) + // } + // + // pub fn set_print_bytes(&mut self, value: bool) -> Result<(), ParseCmpError> { + // // Should actually raise an error if --silent is set, but GNU cmp does not do that. + // if value && self.silent { + // return Err(ParseCmpError::OptionsIncompatible( + // options::PRINT_BYTES, + // options::SILENT, + // )); + // } + // self.print_bytes = value; + // + // Ok(()) + // } + // + // /// Sets the ignore initial bytes for both files. + // /// + // /// Accepts digits[unit][:digits[unit]] \ + // /// Sets the 2nd file to the value of the 1st file if no second parameter is given. \ + // pub fn set_skip_bytes(&mut self, bytes: &str) -> Result<(), ParseCmpError> { + // // empty string is not checked + // + // // Split at ':' if present + // let (skip_1, skip_2) = match bytes.split_once(':') { + // Some((s1, s2)) => (s1, s2), + // None => { + // // set file_to to same value as file_from + // (bytes, bytes) + // } + // }; + // + // self.set_skip_bytes_file_no(skip_1, 1)?; + // self.set_skip_bytes_file_no(skip_2, 2)?; + // + // Ok(()) + // } + // + // /// Sets the [Self::skip_bytes_from] or [Self::skip_bytes_to] value. + // /// + // /// GNU cmp always uses the higher number in case of conflicting definitions + // /// with --ignore-initial and operand + // fn set_skip_bytes_file_no( + // &mut self, + // bytes_num_unit: &str, + // file_no: i32, + // ) -> Result { + // let skip = match Self::parse_num_bytes(bytes_num_unit) { + // Ok(r) => r, + // Err(e) => { + // return Err(ParseCmpError::ParseSizeError( + // options::IGNORE_INITIAL, + // bytes_num_unit.to_string(), + // e, + // )); + // } + // }; + // match file_no { + // // use higher value + // 1 => { + // self.skip_bytes_from = match self.skip_bytes_from { + // Some(v) => Some(skip.max(v)), + // None => Some(skip), + // } + // } + // 2 => { + // self.skip_bytes_to = match self.skip_bytes_to { + // Some(v) => Some(skip.max(v)), + // None => Some(skip), + // } + // } + // _ => panic!("logic error"), + // } + // + // Ok(skip) + // } + + pub fn set_format( + format: &mut Option, + option: &str, + value: bool, + ) -> Result<(), UParseError> { + if value { + let new: FormatOutput = option.into(); + match format { + Some(f) => { + return Err(UParseError::ConflictingOutputStyle( + f.to_string(), + new.to_string(), + )); + } + None => *format = Some(new), + } + } + Ok(()) + } + + pub fn set_context_lines( + format_out: &mut Option, + params: &mut Self, + context: &str, + ) -> Result<(), UParseError> { + Self::set_format(format_out, options::CONTEXT_LINES, true)?; + params.format_out = FormatOutput::Context; + match context.parse::() { + Ok(context_size) => { + params.n_output_lines = context_size; + } + Err(_) => { + // empty stays on default + if !context.is_empty() { + return Err(UParseError::InvalidContextLength(context.to_string())); + } + } + } + Ok(()) + } + + pub fn set_unified_lines( + format_out: &mut Option, + params: &mut Self, + unified: &str, + ) -> Result<(), UParseError> { + Self::set_format(format_out, options::UNIFIED_LINES, true)?; + params.format_out = FormatOutput::Unified; + match unified.parse::() { + Ok(unified_size) => { + params.n_output_lines = unified_size; + } + Err(_) => { + // empty stays on default + if !unified.is_empty() { + return Err(UParseError::InvalidUnifiedLength(unified.to_string())); + } + } + } + Ok(()) + } + + // /// Parse a SIZE string into a number of bytes. + // /// A size string comprises an integer and an optional unit. + // /// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1. + // /// Default is K. + // fn parse_num_bytes(input: &str) -> Result { + // let size = Parser::default() + // .with_allow_list(&ALLOWED_UNITS) + // // .with_default_unit("K") + // // .with_b_byte_count(true) + // .parse(input.trim())?; + // + // SkipU64::try_from(size).map_err(|_| { + // // ParseSizeError::SizeTooBig(translate!("sort-error-buffer-size-too-big", "size" => size)) + // ParseSizeError::SizeTooBig(input.to_string()) + // }) + // } +} + +/// Converts clap args to Params. +impl TryFrom for Params { + // For centralized parser errors. Requires Parser with UResult and all errors with .into(). + // type Error = Box; + type Error = UParseError; + + // fn try_from(matches: clap::ArgMatches) -> UResult { + fn try_from(matches: clap::ArgMatches) -> Result { + // dbg!(&matches); + + let mut params = Self { + brief: matches.get_flag(options::BRIEF), + expand_tabs: matches.get_flag(options::EXPAND_TABS), + ignore_all_space: matches.get_flag(options::IGNORE_ALL_SPACE), + ignore_blank_lines: matches.get_flag(options::IGNORE_BLANK_LINES), + ignore_case: matches.get_flag(options::IGNORE_CASE), + ignore_file_name_case: matches.get_flag(options::IGNORE_FILE_NAME_CASE), + ignore_space_change: matches.get_flag(options::IGNORE_SPACE_CHANGE), + ignore_tab_expansion: matches.get_flag(options::IGNORE_TAB_EXPANSION), + ignore_trailing_space: matches.get_flag(options::IGNORE_TRAILING_SPACE), + initial_tab: matches.get_flag(options::INITIAL_TAB), + label: matches.get_flag(options::LABEL), + left_column: matches.get_flag(options::LEFT_COLUMN), + minimal: matches.get_flag(options::MINIMAL), + new_file: matches.get_flag(options::NEW_FILE), + no_dereference: matches.get_flag(options::NO_DEREFERENCE), + no_ignore_file_name_case: matches.get_flag(options::NO_IGNORE_FILE_NAME_CASE), + paginate: matches.get_flag(options::PAGINATE), + rcs: matches.get_flag(options::RCS), + recursive: matches.get_flag(options::RECURSIVE), + report_identical_files: matches.get_flag(options::REPORT_IDENTICAL_FILES), + show_c_function: matches.get_flag(options::SHOW_C_FUNCTION), + speed_large_files: matches.get_flag(options::SPEED_LARGE_FILES), + strip_trailing_cr: matches.get_flag(options::STRIP_TRAILING_CR), + suppress_blank_empty: matches.get_flag(options::SUPPRESS_BLANK_EMPTY), + suppress_common_lines: matches.get_flag(options::SUPPRESS_COMMON_LINES), + text: matches.get_flag(options::TEXT), + unidirectional_new_file: matches.get_flag(options::UNIDIRECTIONAL_NEW_FILE), + ..Default::default() + }; + + // set output format + let mut format_out = if matches.get_flag(options::NORMAL) { + Some(FormatOutput::Normal) + } else { + None + }; + Self::set_format(&mut format_out, options::ED, matches.get_flag(options::ED))?; + Self::set_format( + &mut format_out, + options::SIDE_BY_SIDE, + matches.get_flag(options::SIDE_BY_SIDE), + )?; + + // has color? + if let Some(color) = matches.get_one::(options::COLOR) { + params.color = Some(color.clone()); + } + + // has context? + if let Some(context) = matches.get_one::(options::CONTEXT_LINES) { + Self::set_context_lines(&mut format_out, &mut params, context)?; + } + if let Some(context) = matches.get_one::(options::CONTEXT_LINES_UPPER) { + Self::set_context_lines(&mut format_out, &mut params, context)?; + } + + // has exclude? + if let Some(exclude) = matches.get_one::(options::EXCLUDE) { + params.exclude = Some(exclude.clone()); + } + + // has exclude_from? + if let Some(exclude_from) = matches.get_one::(options::EXCLUDE_FROM) { + params.exclude_from = Some(exclude_from.clone()); + } + + // has from_file? + if let Some(from_file) = matches.get_one::(options::FROM_FILE) { + params.from_file = Some(from_file.clone()); + } + + // has gtype_group_format? + if let Some(gtype_group_format) = matches.get_one::(options::GTYPE_GROUP_FORMAT) { + params.gtype_group_format = Some(gtype_group_format.clone()); + } + + // has horizon_lines? + if let Some(horizon_lines) = matches.get_one::(options::HORIZON_LINES) { + params.horizon_lines = Some(*horizon_lines); + } + + // has ifdef? + if let Some(ifdef) = matches.get_one::(options::IFDEF) { + params.ifdef = Some(ifdef.clone()); + } + + // has ignore_matching_lines? + if let Some(ignore_matching_lines) = + matches.get_one::(options::IGNORE_MATCHING_LINES) + { + params.ignore_matching_lines = Some(ignore_matching_lines.clone()); + } + + // has line_format? + if let Some(line_format) = matches.get_one::(options::LINE_FORMAT) { + params.line_format = Some(line_format.clone()); + } + + // has ltype_line_format? + if let Some(ltype_line_format) = matches.get_one::(options::LTYPE_LINE_FORMAT) { + params.ltype_line_format = Some(ltype_line_format.clone()); + } + + // has palette? + if let Some(palette) = matches.get_one::(options::PALETTE) { + params.palette = Some(palette.clone()); + } + + // has show_function_line? + if let Some(show_function_line) = matches.get_one::(options::SHOW_FUNCTION_LINE) { + params.show_function_line = Some(show_function_line.clone()); + } + + // has starting_file? + if let Some(starting_file) = matches.get_one::(options::STARTING_FILE) { + params.starting_file = Some(starting_file.clone()); + } + + // has tabsize? + if let Some(tabsize) = matches.get_one::(options::TABSIZE) { + params.tabsize = *tabsize as usize; + // params.tabsize = tabsize + // .parse::() + // .map_err(|_op| ParseDiffError::InvalidSomething)?; + } + + // has to_file? + if let Some(to_file) = matches.get_one::(options::TO_FILE) { + params.to_file = Some(to_file.clone()); + } + + // has unified? + if let Some(unified) = matches.get_one::(options::UNIFIED_LINES) { + Self::set_unified_lines(&mut format_out, &mut params, unified)?; + } + if let Some(unified) = matches.get_one::(options::UNIFIED_LINES_UPPER) { + Self::set_unified_lines(&mut format_out, &mut params, unified)?; + } + + // has width? + if let Some(width) = matches.get_one::(options::WIDTH) { + params.width = *width as usize; + // params.width = width + // .parse::() + // .map_err(|_op| ParseDiffError::InvalidSomething)?; + } + + if let Some(format) = format_out { + params.format_out = format; + } + + // get files + let files: Vec = match matches.get_many::(options::FILE) { + Some(v) => v.cloned().collect(), + None => { + return Err(UParseError::MissingOperand(uucore::util_name().to_string())); + } + }; + // dbg!(&files); + + match files.len() { + 0 => return Err(UParseError::MissingOperand(uucore::util_name().to_string())), + 1 => { + return Err(UParseError::MissingOperand( + files[0].to_string_lossy().to_string(), + )); + } + 2 => { + // diff DIRECTORY FILE => diff DIRECTORY/FILE FILE + // diff FILE DIRECTORY => diff FILE DIRECTORY/FILE + let mut from_path = PathBuf::from(&files[0]); + let mut to_path = PathBuf::from(&files[1]); + + if from_path.is_dir() && to_path.is_file() { + from_path.push(to_path.file_name().unwrap()); + } else if from_path.is_file() && to_path.is_dir() { + to_path.push(from_path.file_name().unwrap()); + } + params.from = from_path.into_os_string(); + params.to = to_path.into_os_string(); + } + _ => { + // dbg!(&files); + return Err(UParseError::ExtraOperand(files[2].clone())); + } + } + + // not yet implemented error; delete when implemented + if matches.get_one::(options::COLOR).is_some() { + return Err(UParseError::NotYetImplemented(options::COLOR)); + } + if matches.get_one::(options::EXCLUDE).is_some() { + return Err(UParseError::NotYetImplemented(options::EXCLUDE)); + } + if matches.get_one::(options::EXCLUDE_FROM).is_some() { + return Err(UParseError::NotYetImplemented(options::EXCLUDE_FROM)); + } + if matches.get_one::(options::FROM_FILE).is_some() { + return Err(UParseError::NotYetImplemented(options::FROM_FILE)); + } + if matches + .get_one::(options::GTYPE_GROUP_FORMAT) + .is_some() + { + return Err(UParseError::NotYetImplemented(options::GTYPE_GROUP_FORMAT)); + } + if matches.get_one::(options::HORIZON_LINES).is_some() { + return Err(UParseError::NotYetImplemented(options::HORIZON_LINES)); + } + if matches.get_one::(options::IFDEF).is_some() { + return Err(UParseError::NotYetImplemented(options::IFDEF)); + } + if matches.get_flag(options::IGNORE_ALL_SPACE) { + return Err(UParseError::NotYetImplemented(options::IGNORE_ALL_SPACE)); + } + if matches.get_flag(options::IGNORE_BLANK_LINES) { + return Err(UParseError::NotYetImplemented(options::IGNORE_BLANK_LINES)); + } + if matches.get_flag(options::IGNORE_CASE) { + return Err(UParseError::NotYetImplemented(options::IGNORE_CASE)); + } + if matches.get_flag(options::IGNORE_FILE_NAME_CASE) { + return Err(UParseError::NotYetImplemented( + options::IGNORE_FILE_NAME_CASE, + )); + } + if matches + .get_one::(options::IGNORE_MATCHING_LINES) + .is_some() + { + return Err(UParseError::NotYetImplemented( + options::IGNORE_MATCHING_LINES, + )); + } + if matches.get_flag(options::IGNORE_SPACE_CHANGE) { + return Err(UParseError::NotYetImplemented(options::IGNORE_SPACE_CHANGE)); + } + if matches.get_flag(options::IGNORE_TAB_EXPANSION) { + return Err(UParseError::NotYetImplemented( + options::IGNORE_TAB_EXPANSION, + )); + } + if matches.get_flag(options::IGNORE_TRAILING_SPACE) { + return Err(UParseError::NotYetImplemented( + options::IGNORE_TRAILING_SPACE, + )); + } + if matches.get_flag(options::INITIAL_TAB) { + return Err(UParseError::NotYetImplemented(options::INITIAL_TAB)); + } + if matches.get_flag(options::LABEL) { + return Err(UParseError::NotYetImplemented(options::LABEL)); + } + if matches.get_flag(options::LEFT_COLUMN) { + return Err(UParseError::NotYetImplemented(options::LEFT_COLUMN)); + } + if matches.get_one::(options::LINE_FORMAT).is_some() { + return Err(UParseError::NotYetImplemented(options::LINE_FORMAT)); + } + if matches + .get_one::(options::LTYPE_LINE_FORMAT) + .is_some() + { + return Err(UParseError::NotYetImplemented(options::LTYPE_LINE_FORMAT)); + } + if matches.get_flag(options::MINIMAL) { + return Err(UParseError::NotYetImplemented(options::MINIMAL)); + } + if matches.get_flag(options::NEW_FILE) { + return Err(UParseError::NotYetImplemented(options::NEW_FILE)); + } + if matches.get_flag(options::NO_DEREFERENCE) { + return Err(UParseError::NotYetImplemented(options::NO_DEREFERENCE)); + } + if matches.get_flag(options::NO_IGNORE_FILE_NAME_CASE) { + return Err(UParseError::NotYetImplemented( + options::NO_IGNORE_FILE_NAME_CASE, + )); + } + if matches.get_flag(options::PAGINATE) { + return Err(UParseError::NotYetImplemented(options::PAGINATE)); + } + if matches.get_one::(options::PALETTE).is_some() { + return Err(UParseError::NotYetImplemented(options::PALETTE)); + } + if matches.get_flag(options::RCS) { + return Err(UParseError::NotYetImplemented(options::RCS)); + } + if matches.get_flag(options::RECURSIVE) { + return Err(UParseError::NotYetImplemented(options::RECURSIVE)); + } + if matches.get_flag(options::SHOW_C_FUNCTION) { + return Err(UParseError::NotYetImplemented(options::SHOW_C_FUNCTION)); + } + if matches + .get_one::(options::SHOW_FUNCTION_LINE) + .is_some() + { + return Err(UParseError::NotYetImplemented(options::SHOW_FUNCTION_LINE)); + } + if matches.get_flag(options::SPEED_LARGE_FILES) { + return Err(UParseError::NotYetImplemented(options::SPEED_LARGE_FILES)); + } + if matches.get_one::(options::STARTING_FILE).is_some() { + return Err(UParseError::NotYetImplemented(options::STARTING_FILE)); + } + if matches.get_flag(options::STRIP_TRAILING_CR) { + return Err(UParseError::NotYetImplemented(options::STRIP_TRAILING_CR)); + } + if matches.get_flag(options::SUPPRESS_BLANK_EMPTY) { + return Err(UParseError::NotYetImplemented( + options::SUPPRESS_BLANK_EMPTY, + )); + } + if matches.get_flag(options::SUPPRESS_COMMON_LINES) { + return Err(UParseError::NotYetImplemented( + options::SUPPRESS_COMMON_LINES, + )); + } + if matches.get_flag(options::TEXT) { + return Err(UParseError::NotYetImplemented(options::TEXT)); + } + if matches.get_one::(options::TO_FILE).is_some() { + return Err(UParseError::NotYetImplemented(options::TO_FILE)); + } + if matches.get_flag(options::UNIDIRECTIONAL_NEW_FILE) { + return Err(UParseError::NotYetImplemented( + options::UNIDIRECTIONAL_NEW_FILE, + )); + } + + // dbg!(¶ms); + Ok(params) + } +} + +// #[cfg(not(target_os = "windows"))] +// fn is_stdout_dev_null() -> bool { +// use std::{ +// fs, io, +// os::{fd::AsRawFd, unix::fs::MetadataExt}, +// }; +// +// let Ok(dev_null) = fs::metadata("/dev/null") else { +// return false; +// }; +// +// let stdout_fd = io::stdout().lock().as_raw_fd(); +// +// // SAFETY: we have exclusive access to stdout right now. +// let stdout_file = unsafe { +// use std::os::fd::FromRawFd; +// fs::File::from_raw_fd(stdout_fd) +// }; +// let Ok(stdout) = stdout_file.metadata() else { +// return false; +// }; +// +// let is_dev_null = stdout.dev() == dev_null.dev() && stdout.ino() == dev_null.ino(); +// +// // Don't let File close the fd. It's unfortunate that File doesn't have a leak_fd(). +// std::mem::forget(stdout_file); +// +// is_dev_null +// } + +// uu_app .args for the options +pub fn uu_app() -> Command { + // TODO this defines the order of the items in the help, maybe reorder + Command::new(uucore::util_name()) + .version(uucore::crate_version!()) + .help_template(uucore::localized_help_template(uucore::util_name())) + .override_usage(uucore::format_usage(&translate!("diff-usage"))) + .about(translate!("diff-about")) + .infer_long_args(true) + .arg( + Arg::new(options::FILE) + .action(ArgAction::Append) + .hide(true) + .value_hint(clap::ValueHint::FilePath) + .value_parser(clap::value_parser!(OsString)), + ) + .arg( + Arg::new(options::BRIEF) + .long("brief") + .short('q') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-brief")), + ) + .arg( + Arg::new(options::COLOR) + .long("color") + .value_name("WHEN]") + .action(ArgAction::Set) + .help(translate!("diff-help-color")), + ) + .arg( + Arg::new(options::CONTEXT_LINES) + .long("context") + .short('c') + .value_name("NUM") + .num_args(0..=1) + .require_equals(true) + .default_missing_value("3") + .action(ArgAction::Set) + .help(translate!("diff-help-context")), + ) + .arg( + Arg::new(options::CONTEXT_LINES_UPPER) + .short('C') + .value_name("NUM") + .action(ArgAction::Set) + .help(translate!("diff-help-context")), + ) + .arg( + Arg::new(options::ED) + .long("ed") + .short('e') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ed")), + ) + .arg( + Arg::new(options::EXCLUDE) + .long("exclude") + .short('x') + .value_name("PAT") + .action(ArgAction::Set) + .help(translate!("diff-help-exclude")), + ) + .arg( + Arg::new(options::EXCLUDE_FROM) + .long("exclude-from") + .short('X') + .value_name("FILE") + .action(ArgAction::Set) + .help(translate!("diff-help-exclude-from")), + ) + .arg( + Arg::new(options::EXPAND_TABS) + .long("expand-tabs") + .short('t') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-expand-tabs")), + ) + .arg( + Arg::new(options::FROM_FILE) + .long("from-file") + .value_name("FILE1") + .action(ArgAction::Set) + .help(translate!("diff-help-from-file")), + ) + .arg( + Arg::new(options::GTYPE_GROUP_FORMAT) + .long("gtype-group-format") + .value_name("GFMT") + .action(ArgAction::Set) + .help(translate!("diff-help-gtype-group-format")), + ) + .arg( + Arg::new(options::HORIZON_LINES) + .long("horizon-lines") + .value_name("NUM") + .value_parser(clap::value_parser!(usize)) + .action(ArgAction::Set) + .help(translate!("diff-help-horizon-lines")), + ) + .arg( + Arg::new(options::IFDEF) + .long("ifdef") + .short('D') + .value_name("NAME") + .action(ArgAction::Set) + .help(translate!("diff-help-ifdef")), + ) + .arg( + Arg::new(options::IGNORE_ALL_SPACE) + .long("ignore-all-space") + .short('w') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-all-space")), + ) + .arg( + Arg::new(options::IGNORE_BLANK_LINES) + .long("ignore-blank-lines") + .short('B') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-blank-lines")), + ) + .arg( + Arg::new(options::IGNORE_CASE) + .long("ignore-case") + .short('i') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-case")), + ) + .arg( + Arg::new(options::IGNORE_FILE_NAME_CASE) + .long("ignore-file-name-case") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-file-name-case")), + ) + .arg( + Arg::new(options::IGNORE_MATCHING_LINES) + .long("ignore-matching-lines") + .short('I') + .value_name("REGEXP") + .action(ArgAction::Set) + .help(translate!("diff-help-ignore-matching-lines")), + ) + .arg( + Arg::new(options::IGNORE_SPACE_CHANGE) + .long("ignore-space-change") + .short('b') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-space-change")), + ) + .arg( + Arg::new(options::IGNORE_TAB_EXPANSION) + .long("ignore-tab-expansion") + .short('E') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-tab-expansion")), + ) + .arg( + Arg::new(options::IGNORE_TRAILING_SPACE) + .long("ignore-trailing-space") + .short('Z') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-ignore-trailing-space")), + ) + .arg( + Arg::new(options::INITIAL_TAB) + .long("initial-tab") + .short('T') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-initial-tab")), + ) + .arg( + Arg::new(options::LABEL) + .long("label") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-label")), + ) + .arg( + Arg::new(options::LEFT_COLUMN) + .long("left-column") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-left-column")), + ) + .arg( + Arg::new(options::LINE_FORMAT) + .long("line-format") + .value_name("LFMT") + .action(ArgAction::Set) + .help(translate!("diff-help-line-format")), + ) + .arg( + Arg::new(options::LTYPE_LINE_FORMAT) + .long("ltype-line-format") + .value_name("LFMT") + .action(ArgAction::Set) + .help(translate!("diff-help-ltype-line-format")), + ) + .arg( + Arg::new(options::MINIMAL) + .long("minimal") + .short('d') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-minimal")), + ) + .arg( + Arg::new(options::NEW_FILE) + .long("new-file") + .short('N') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-new-file")), + ) + .arg( + Arg::new(options::NO_DEREFERENCE) + .long("no-dereference") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-no-dereference")), + ) + .arg( + Arg::new(options::NO_IGNORE_FILE_NAME_CASE) + .long("no-ignore-file-name-case") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-no-ignore-file-name-case")), + ) + .arg( + Arg::new(options::NORMAL) + .long("normal") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-normal")), + ) + .arg( + Arg::new(options::PAGINATE) + .long("paginate") + .short('l') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-paginate")), + ) + .arg( + Arg::new(options::PALETTE) + .long("palette") + .value_name("PALETTE") + .action(ArgAction::Set) + .help(translate!("diff-help-palette")), + ) + .arg( + Arg::new(options::RCS) + .long("rcs") + .short('n') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-rcs")), + ) + .arg( + Arg::new(options::RECURSIVE) + .long("recursive") + .short('r') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-recursive")), + ) + .arg( + Arg::new(options::REPORT_IDENTICAL_FILES) + .long("report-identical-files") + .short('s') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-report-identical-files")), + ) + .arg( + Arg::new(options::SHOW_C_FUNCTION) + .long("show-c-function") + .short('p') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-show-c-function")), + ) + .arg( + Arg::new(options::SHOW_FUNCTION_LINE) + .long("show-function-line") + .short('F') + .value_name("REGEXP") + .action(ArgAction::Set) + .help(translate!("diff-help-show-function-line")), + ) + .arg( + Arg::new(options::SIDE_BY_SIDE) + .long("side-by-side") + .short('y') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-side-by-side")), + ) + .arg( + Arg::new(options::SPEED_LARGE_FILES) + .long("speed-large-files") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-speed-large-files")), + ) + .arg( + Arg::new(options::STARTING_FILE) + .long("starting-file") + .short('S') + .value_name("FILE") + .action(ArgAction::Set) + .help(translate!("diff-help-starting-file")), + ) + .arg( + Arg::new(options::STRIP_TRAILING_CR) + .long("strip-trailing-cr") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-strip-trailing-cr")), + ) + .arg( + Arg::new(options::SUPPRESS_BLANK_EMPTY) + .long("suppress-blank-empty") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-suppress-blank-empty")), + ) + .arg( + Arg::new(options::SUPPRESS_COMMON_LINES) + .long("suppress-common-lines") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-suppress-common-lines")), + ) + .arg( + Arg::new(options::TABSIZE) + .long("tabsize") + .value_name("NUM") + .value_parser(clap::value_parser!(u16)) + .action(ArgAction::Set) + .help(translate!("diff-help-tabsize")), + ) + .arg( + Arg::new(options::TEXT) + .long("text") + .short('a') + .action(ArgAction::SetTrue) + .help(translate!("diff-help-text")), + ) + .arg( + Arg::new(options::TO_FILE) + .long("to-file") + .value_name("FILE2") + .action(ArgAction::Set) + .help(translate!("diff-help-to-file")), + ) + .arg( + Arg::new(options::UNIDIRECTIONAL_NEW_FILE) + .long("unidirectional-new-file") + .action(ArgAction::SetTrue) + .help(translate!("diff-help-unidirectional-new-file")), + ) + .arg( + Arg::new(options::UNIFIED_LINES) + .long("unified") + .short('u') + .value_name("NUM") + .num_args(0..=1) + .require_equals(true) + .default_missing_value("3") + .action(ArgAction::Set) + .help(translate!("diff-help-unified")), + ) + .arg( + Arg::new(options::UNIFIED_LINES_UPPER) + .short('U') + .value_name("NUM") + .action(ArgAction::Set) + .help(translate!("diff-help-unified")), + ) + .arg( + Arg::new(options::WIDTH) + .long("width") + .short('W') + .value_name("NUM") + .value_parser(clap::value_parser!(u16)) + .action(ArgAction::Set) + .help(translate!("diff-help-width")), + ) +} diff --git a/src/params.rs b/src/uu/diff/src/params_old.rs similarity index 90% rename from src/params.rs rename to src/uu/diff/src/params_old.rs index 74ef3e3..1de2610 100644 --- a/src/params.rs +++ b/src/uu/diff/src/params_old.rs @@ -1,4 +1,12 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore numvalue + use std::ffi::OsString; + use std::iter::Peekable; use std::path::PathBuf; @@ -764,53 +772,65 @@ mod tests { .peekable() ) ); - assert!(parse_params( - [os("diff"), os("--tabsize"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize="), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + assert!( + parse_params( + [os("diff"), os("--tabsize"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize="), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=-1"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("--tabsize=r2"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [ + os("diff"), + os("--tabsize=92233720368547758088"), + os("foo"), + os("bar") + ] .iter() .cloned() .peekable() - ) - .is_err()); - assert!(parse_params( - [ - os("diff"), - os("--tabsize=92233720368547758088"), - os("foo"), - os("bar") - ] - .iter() - .cloned() - .peekable() - ) - .is_err()); + ) + .is_err() + ); } #[test] fn double_dash() { @@ -858,20 +878,24 @@ mod tests { }), parse_params([os("diff"), os("-"), os("-")].iter().cloned().peekable()) ); - assert!(parse_params( - [os("diff"), os("foo"), os("bar"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err()); - assert!(parse_params( - [os("diff"), os("-"), os("-"), os("-")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os("foo"), os("bar"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); + assert!( + parse_params( + [os("diff"), os("-"), os("-"), os("-")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); } #[test] fn missing_arguments() { @@ -880,13 +904,15 @@ mod tests { } #[test] fn unknown_argument() { - assert!(parse_params( - [os("diff"), os("-g"), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os("-g"), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); assert!( parse_params([os("diff"), os("-g"), os("bar")].iter().cloned().peekable()).is_err() ); @@ -907,13 +933,15 @@ mod tests { ("--normal", "-e"), ("--context", "--normal"), ] { - assert!(parse_params( - [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] - .iter() - .cloned() - .peekable() - ) - .is_err()); + assert!( + parse_params( + [os("diff"), os(arg1), os(arg2), os("foo"), os("bar")] + .iter() + .cloned() + .peekable() + ) + .is_err() + ); } } } diff --git a/src/side_diff.rs b/src/uu/diff/src/side_diff.rs similarity index 97% rename from src/side_diff.rs rename to src/uu/diff/src/side_diff.rs index 56953d2..7acc62f 100644 --- a/src/side_diff.rs +++ b/src/uu/diff/src/side_diff.rs @@ -1,15 +1,21 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore áéíóú endiand mcel rxyz + +//! This file contains the test for the diff utility. +//! +//! Some tests remain in the modules as it would require to make some fn public. +//! Run 'cargo test -p uu_diff' or 'cargo test --workspace' to include these. + +use crate::params_diff::Params; use core::cmp::{max, min}; -use diff::Result; +use diff_crate::Result; use std::{io::Write, vec}; use unicode_width::UnicodeWidthStr; -use crate::params::Params; - const GUTTER_WIDTH_MIN: usize = 3; struct CharIter<'a> { @@ -52,22 +58,19 @@ impl<'a> Iterator for CharIter<'a> { break; } view = &self.current[..index]; - char = str::from_utf8(view) + char = str::from_utf8(view); } - match char { - Ok(c) => { - self.current = self - .current - .get(view.len()..) - .unwrap_or(&self.current[0..0]); - Some((view, UnicodeWidthStr::width(c))) - } - Err(_) => { - // We did not find an utf-8 char within the next 4 bytes, return the single byte. - self.current = &self.current[1..]; - Some((&view[..1], 1)) - } + if let Ok(c) = char { + self.current = self + .current + .get(view.len()..) + .unwrap_or(&self.current[0..0]); + Some((view, UnicodeWidthStr::width(c))) + } else { + // We did not find an utf-8 char within the next 4 bytes, return the single byte. + self.current = &self.current[1..]; + Some((&view[..1], 1)) } } } @@ -348,12 +351,12 @@ pub fn diff( More studies are needed to cover GNU diff side by side with 100% accuracy, which is one of the goals of this project : ) */ - for result in diff::slice(&left_lines, &right_lines) { + for result in diff_crate::slice(&left_lines, &right_lines) { match result { Result::Left(left_ln) => push_output(left_ln, b"", b'<', output, &config).unwrap(), Result::Right(right_ln) => push_output(b"", right_ln, b'>', output, &config).unwrap(), Result::Both(left_ln, right_ln) => { - push_output(left_ln, right_ln, b' ', output, &config).unwrap() + push_output(left_ln, right_ln, b' ', output, &config).unwrap(); } } } @@ -575,7 +578,7 @@ mod tests { let mut buf = vec![]; let gb18030 = b"\x63\x61\x66\xA8\x80"; // some random chinese encoding - // ^ é char, start multi byte + // ^ é char, start multi byte process_half_line(gb18030, 4, false, false, &config, &mut buf).unwrap(); assert_eq!(buf, b"\x63\x61\x66\xA8 "); // break the encoding of 'é' letter } diff --git a/src/unified_diff.rs b/src/uu/diff/src/unified_diff.rs similarity index 95% rename from src/unified_diff.rs rename to src/uu/diff/src/unified_diff.rs index 0f504a8..e81cfcf 100644 --- a/src/unified_diff.rs +++ b/src/uu/diff/src/unified_diff.rs @@ -1,14 +1,16 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore alef alefn alefr alefx betn betr betx nodiff use std::collections::VecDeque; use std::io::Write; -use crate::params::Params; -use crate::utils::do_write_line; -use crate::utils::get_modification_time; +use crate::params_diff::Params; +use uudiff::utils::do_write_line; +use uudiff::utils::get_modification_time; #[derive(Debug, PartialEq)] pub enum DiffLine { @@ -26,8 +28,8 @@ struct Mismatch { } impl Mismatch { - fn new(line_number_expected: u32, line_number_actual: u32) -> Mismatch { - Mismatch { + fn new(line_number_expected: u32, line_number_actual: u32) -> Self { + Self { line_number_expected, line_number_actual, lines: Vec::new(), @@ -65,9 +67,9 @@ fn make_diff( actual_lines.pop(); } - for result in diff::slice(&expected_lines, &actual_lines) { + for result in diff_crate::slice(&expected_lines, &actual_lines) { match result { - diff::Result::Left(str) => { + diff_crate::Result::Left(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -93,7 +95,9 @@ fn make_diff( mismatch.lines.push(DiffLine::Actual(res)); mismatch.lines.push(DiffLine::MissingNL); } - _ => unreachable!("unterminated Left and Common lines shouldn't be followed by more Left lines"), + _ => unreachable!( + "unterminated Left and Common lines shouldn't be followed by more Left lines" + ), } } else { mismatch.lines.push(DiffLine::Expected(str.to_vec())); @@ -104,7 +108,7 @@ fn make_diff( line_number_expected += 1; lines_since_mismatch = 0; } - diff::Result::Right(str) => { + diff_crate::Result::Right(str) => { if lines_since_mismatch >= context_size && lines_since_mismatch > 0 { results.push(mismatch); mismatch = Mismatch::new( @@ -125,7 +129,7 @@ fn make_diff( line_number_actual += 1; lines_since_mismatch = 0; } - diff::Result::Both(str, _) => { + diff_crate::Result::Both(str, _) => { // if one of them is missing a newline and the other isn't, then they don't actually match if (line_number_actual > actual_lines_count) && (line_number_expected > expected_lines_count) @@ -249,7 +253,7 @@ pub fn diff(expected: &[u8], actual: &[u8], params: &Params) -> Vec { to_modified_time ) .into_bytes(); - let diff_results = make_diff(expected, actual, params.context_count, params.brief); + let diff_results = make_diff(expected, actual, params.n_output_lines, params.brief); if diff_results.is_empty() { return Vec::new(); } @@ -410,7 +414,7 @@ mod tests { #[test] fn test_permutations() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -462,7 +466,7 @@ mod tests { &Params { from: "a/alef".into(), to: (&format!("{target}/alef")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -493,7 +497,9 @@ mod tests { ); let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab.diff")).unwrap()) .output() .unwrap(); @@ -512,7 +518,7 @@ mod tests { #[test] fn test_permutations_missing_line_ending() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -578,7 +584,7 @@ mod tests { &Params { from: "a/alefn".into(), to: (&format!("{target}/alefn")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -593,7 +599,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abn.diff")).unwrap()) .output() .unwrap(); @@ -613,7 +621,7 @@ mod tests { #[test] fn test_permutations_empty_lines() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files with missing newlines. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -674,7 +682,7 @@ mod tests { &Params { from: "a/alef_".into(), to: (&format!("{target}/alef_")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -689,7 +697,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/ab_.diff")).unwrap()) .output() .unwrap(); @@ -709,7 +719,7 @@ mod tests { #[test] fn test_permutations_missing_lines() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -755,7 +765,7 @@ mod tests { &Params { from: "a/alefx".into(), to: (&format!("{target}/alefx")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -770,7 +780,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abx.diff")).unwrap()) .output() .unwrap(); @@ -789,7 +801,7 @@ mod tests { #[test] fn test_permutations_reverse() { - let target = "target/unified-diff/"; + let target = "../../../target/unified-diff/"; // test all possible six-line files. let _ = std::fs::create_dir(target); for &a in &[0, 1, 2] { @@ -841,7 +853,7 @@ mod tests { &Params { from: "a/alefr".into(), to: (&format!("{target}/alefr")).into(), - context_count: 2, + n_output_lines: 2, ..Default::default() }, ); @@ -856,7 +868,9 @@ mod tests { let _ = fa; let _ = fb; let output = Command::new("patch") - .arg("-p0") + // .arg("-p0") + .arg("-d") + .arg(&format!("{target}")) .stdin(File::open(format!("{target}/abr.diff")).unwrap()) .output() .unwrap(); @@ -875,7 +889,7 @@ mod tests { #[test] fn test_stop_early() { - use crate::assert_diff_eq; + use uudiff::assert_diff_eq; let from_filename = "foo"; let from = ["a", "b", "c", ""].join("\n"); diff --git a/src/uu/test/Cargo.toml b/src/uu/test/Cargo.toml new file mode 100644 index 0000000..1db297b --- /dev/null +++ b/src/uu/test/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "uu_test" +description = "test ~ (uutils) evaluate comparison and file type expressions" +# The tool dist does not allow different repository names within the workspace. +repository = "https://github.com/uutils/diffutils" +# repository = "https://github.com/uutils/diffutils/tree/main/src/uu/test" +version.workspace = true +authors.workspace = true +license.workspace = true +homepage.workspace = true +keywords.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +readme.workspace = true + +[lints] +workspace = true + +[lib] +path = "src/test.rs" + +[dependencies] +clap = { workspace = true } +fluent = { workspace = true } +libc = { workspace = true } +thiserror = { workspace = true } +uucore = { workspace = true, features = ["process"] } + +[dev-dependencies] +tempfile = { workspace = true } + +[[bin]] +name = "test" +path = "src/main.rs" diff --git a/src/uu/test/LICENSE b/src/uu/test/LICENSE new file mode 120000 index 0000000..5853aae --- /dev/null +++ b/src/uu/test/LICENSE @@ -0,0 +1 @@ +../../../LICENSE \ No newline at end of file diff --git a/src/uu/test/locales/en-US.ftl b/src/uu/test/locales/en-US.ftl new file mode 100644 index 0000000..56d8d4f --- /dev/null +++ b/src/uu/test/locales/en-US.ftl @@ -0,0 +1,160 @@ +test-about = Check file types and compare values. +test-usage = test EXPRESSION + test + {"[ EXPRESSION ]"} + {"[ ]"} + {"[ OPTION"} +test-after-help = Exit with the status determined by EXPRESSION. + + An omitted EXPRESSION defaults to false. + Otherwise, EXPRESSION is true or false and sets exit status. + It is one of: + + ( EXPRESSION ) + EXPRESSION is true + + ! EXPRESSION + EXPRESSION is false + + EXPRESSION1 -a EXPRESSION2 + both EXPRESSION1 and EXPRESSION2 are true + + EXPRESSION1 -o EXPRESSION2 + either EXPRESSION1 or EXPRESSION2 is true + + String operations: + + -n STRING + the length of STRING is nonzero + + STRING + equivalent to -n STRING + + -z STRING + the length of STRING is zero + + STRING1 = STRING2 + the strings are equal + + STRING1 != STRING2 + the strings are not equal + + STRING1 > STRING2 + STRING1 is greater than STRING2 in the current locale + + STRING1 < STRING2 + STRING1 is less than STRING2 in the current locale + + Integer comparisons: + + INTEGER1 -eq INTEGER2 + INTEGER1 is equal to INTEGER2 + + INTEGER1 -ge INTEGER2 + INTEGER1 is greater than or equal to INTEGER2 + + INTEGER1 -gt INTEGER2 + INTEGER1 is greater than INTEGER2 + + INTEGER1 -le INTEGER2 + INTEGER1 is less than or equal to INTEGER2 + + INTEGER1 -lt INTEGER2 + INTEGER1 is less than INTEGER2 + + INTEGER1 -ne INTEGER2 + INTEGER1 is not equal to INTEGER2 + + File operations: + + FILE1 -ef FILE2 + FILE1 and FILE2 have the same device and inode numbers + + FILE1 -nt FILE2 + FILE1 is newer (modification date) than FILE2 + + FILE1 -ot FILE2 + FILE1 is older than FILE2 + + -b FILE + FILE exists and is block special + + -c FILE + FILE exists and is character special + + -d FILE + FILE exists and is a directory + + -e FILE + FILE exists + + -f FILE + FILE exists and is a regular file + + -g FILE + FILE exists and is set-group-ID + + -G FILE + FILE exists and is owned by the effective group ID + + -h FILE + FILE exists and is a symbolic link (same as -L) + + -k FILE + FILE exists and has its sticky bit set + + -L FILE + FILE exists and is a symbolic link (same as -h) + + -N FILE + FILE exists and has been modified since it was last read + + -O FILE + FILE exists and is owned by the effective user ID + + -p FILE + FILE exists and is a named pipe + + -r FILE + FILE exists and read permission is granted + + -s FILE + FILE exists and has a size greater than zero + + -S FILE + FILE exists and is a socket + + -t FD + file descriptor FD is opened on a terminal + + -u FILE + FILE exists and its set-user-ID bit is set + + -w FILE + FILE exists and write permission is granted + + -x FILE + FILE exists and execute (or search) permission is granted + + Except for -h and -L, all FILE-related tests dereference (follow) symbolic links. + Beware that parentheses need to be escaped (e.g., by backslashes) for shells. + INTEGER may also be -l STRING, which evaluates to the length of STRING. + + NOTE: Binary -a and -o are inherently ambiguous. + Use test EXPR1 && test EXPR2 or test EXPR1 || test EXPR2 instead. + + NOTE: {"["} honors the --help and --version options, but test does not. + test treats each of those as it treats any other nonempty STRING. + + NOTE: your shell may have its own version of test and/or {"["}, which usually supersedes the version described here. + Please refer to your shell's documentation for details about the options it supports. + +# Error messages +test-error-missing-closing-bracket = missing '{"]"}' +test-error-expected = expected { $value } +test-error-expected-value = expected value +test-error-missing-argument = missing argument after { $argument } +test-error-extra-argument = extra argument { $argument } +test-error-unknown-operator = unknown operator { $operator } +test-error-invalid-integer = invalid integer { $value } +test-error-unary-operator-expected = { $operator }: unary operator expected diff --git a/src/uu/test/locales/fr-FR.ftl b/src/uu/test/locales/fr-FR.ftl new file mode 100644 index 0000000..9255bf9 --- /dev/null +++ b/src/uu/test/locales/fr-FR.ftl @@ -0,0 +1,160 @@ +test-about = Vérifier les types de fichiers et comparer les valeurs. +test-usage = test EXPRESSION + test + {"[ EXPRESSION ]"} + {"[ ]"} + {"[ OPTION"} +test-after-help = Quitter avec le statut déterminé par EXPRESSION. + + Une EXPRESSION omise vaut false par défaut. + Sinon, EXPRESSION est true ou false et définit le statut de sortie. + Il peut s'agir de : + + ( EXPRESSION ) + EXPRESSION est vraie + + ! EXPRESSION + EXPRESSION est fausse + + EXPRESSION1 -a EXPRESSION2 + EXPRESSION1 et EXPRESSION2 sont toutes deux vraies + + EXPRESSION1 -o EXPRESSION2 + EXPRESSION1 ou EXPRESSION2 est vraie + + Opérations sur les chaînes : + + -n STRING + la longueur de STRING est non nulle + + STRING + équivalent à -n STRING + + -z STRING + la longueur de STRING est nulle + + STRING1 = STRING2 + les chaînes sont égales + + STRING1 != STRING2 + les chaînes ne sont pas égales + + STRING1 > STRING2 + STRING1 est plus grande que STRING2 dans les paramètres régionaux actuels + + STRING1 < STRING2 + STRING1 est plus petite que STRING2 dans les paramètres régionaux actuels + + Comparaisons d'entiers : + + INTEGER1 -eq INTEGER2 + INTEGER1 est égal à INTEGER2 + + INTEGER1 -ge INTEGER2 + INTEGER1 est supérieur ou égal à INTEGER2 + + INTEGER1 -gt INTEGER2 + INTEGER1 est supérieur à INTEGER2 + + INTEGER1 -le INTEGER2 + INTEGER1 est inférieur ou égal à INTEGER2 + + INTEGER1 -lt INTEGER2 + INTEGER1 est inférieur à INTEGER2 + + INTEGER1 -ne INTEGER2 + INTEGER1 n'est pas égal à INTEGER2 + + Opérations sur les fichiers : + + FILE1 -ef FILE2 + FILE1 et FILE2 ont les mêmes numéros de périphérique et d'inode + + FILE1 -nt FILE2 + FILE1 est plus récent (date de modification) que FILE2 + + FILE1 -ot FILE2 + FILE1 est plus ancien que FILE2 + + -b FILE + FILE existe et est un fichier spécial de type bloc + + -c FILE + FILE existe et est un fichier spécial de type caractère + + -d FILE + FILE existe et est un répertoire + + -e FILE + FILE existe + + -f FILE + FILE existe et est un fichier régulier + + -g FILE + FILE existe et a le bit set-group-ID + + -G FILE + FILE existe et appartient à l'ID de groupe effectif + + -h FILE + FILE existe et est un lien symbolique (identique à -L) + + -k FILE + FILE existe et a son bit sticky défini + + -L FILE + FILE existe et est un lien symbolique (identique à -h) + + -N FILE + FILE existe et a été modifié depuis sa dernière lecture + + -O FILE + FILE existe et appartient à l'ID utilisateur effectif + + -p FILE + FILE existe et est un tube nommé + + -r FILE + FILE existe et la permission de lecture est accordée + + -s FILE + FILE existe et a une taille supérieure à zéro + + -S FILE + FILE existe et est un socket + + -t FD + le descripteur de fichier FD est ouvert sur un terminal + + -u FILE + FILE existe et son bit set-user-ID est défini + + -w FILE + FILE existe et la permission d'écriture est accordée + + -x FILE + FILE existe et la permission d'exécution (ou de recherche) est accordée + + À l'exception de -h et -L, tous les tests liés aux FILE déréférencent (suivent) les liens symboliques. + Attention : les parenthèses doivent être échappées (par exemple, par des barres obliques inverses) pour les shells. + INTEGER peut aussi être -l STRING, qui évalue la longueur de STRING. + + NOTE : Les -a et -o binaires sont intrinsèquement ambigus. + Utilisez test EXPR1 && test EXPR2 ou test EXPR1 || test EXPR2 à la place. + + NOTE : {"["} honore les options --help et --version, mais test ne le fait pas. + test traite chacune de celles-ci comme il traite toute autre STRING non vide. + + NOTE : votre shell peut avoir sa propre version de test et/ou {"["}, qui remplace généralement la version décrite ici. + Veuillez vous référer à la documentation de votre shell pour les détails sur les options qu'il prend en charge. + +# Messages d'erreur +test-error-missing-closing-bracket = '{"]"}' manquant +test-error-expected = { $value } attendu +test-error-expected-value = valeur attendue +test-error-missing-argument = argument manquant après { $argument } +test-error-extra-argument = argument supplémentaire { $argument } +test-error-unknown-operator = opérateur inconnu { $operator } +test-error-invalid-integer = entier invalide { $value } +test-error-unary-operator-expected = { $operator } : opérateur unaire attendu diff --git a/src/uu/test/src/error.rs b/src/uu/test/src/error.rs new file mode 100644 index 0000000..cd9c071 --- /dev/null +++ b/src/uu/test/src/error.rs @@ -0,0 +1,36 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use thiserror::Error; +use uucore::translate; + +/// Represents an error encountered while parsing a test expression +#[derive(Error, Debug)] +pub enum ParseError { + #[error("{}", translate!("test-error-expected-value"))] + ExpectedValue, + #[error("{}", translate!("test-error-expected", "value" => .0))] + Expected(String), + #[error("{}", translate!("test-error-extra-argument", "argument" => .0))] + ExtraArgument(String), + #[error("{}", translate!("test-error-missing-argument", "argument" => .0))] + MissingArgument(String), + #[error("{}", translate!("test-error-unknown-operator", "operator" => .0))] + UnknownOperator(String), + #[error("{}", translate!("test-error-invalid-integer", "value" => .0))] + InvalidInteger(String), + #[error("{}", translate!("test-error-unary-operator-expected", "operator" => .0))] + UnaryOperatorExpected(String), +} + +/// A Result type for parsing test expressions +pub type ParseResult = Result; + +/// Implement `UError` trait for `ParseError` to make it easier to return useful error codes from `main()`. +impl uucore::error::UError for ParseError { + fn code(&self) -> i32 { + 2 + } +} diff --git a/src/uu/test/src/main.rs b/src/uu/test/src/main.rs new file mode 100644 index 0000000..f594ecd --- /dev/null +++ b/src/uu/test/src/main.rs @@ -0,0 +1,10 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// uucore::bin!(uu_test); + +pub fn main() { + // TODO uu_test +} diff --git a/src/uu/test/src/parser.rs b/src/uu/test/src/parser.rs new file mode 100644 index 0000000..f391752 --- /dev/null +++ b/src/uu/test/src/parser.rs @@ -0,0 +1,444 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (grammar) BOOLOP STRLEN FILETEST FILEOP INTOP STRINGOP ; (vars) LParen StrlenOp + +use std::ffi::{OsStr, OsString}; +use std::iter::Peekable; + +use super::error::{ParseError, ParseResult}; + +use uucore::display::Quotable; + +/// Represents one of the binary comparison operators for strings, integers, or files +#[derive(Debug, PartialEq, Eq)] +pub enum Operator { + String(OsString), + Int(OsString), + File(OsString), +} + +/// Represents one of the unary test operators for strings or files +#[derive(Debug, PartialEq, Eq)] +pub enum UnaryOperator { + StrlenOp(OsString), + FiletestOp(OsString), +} + +/// Represents a parsed token from a test expression +#[derive(Debug, PartialEq, Eq)] +pub enum Symbol { + LParen, + Bang, + BoolOp(OsString), + Literal(OsString), + Op(Operator), + UnaryOp(UnaryOperator), + None, +} + +impl Symbol { + /// Create a new Symbol from an [`OsString`]. + /// + /// Returns `Symbol::None` in place of None + fn new(token: Option) -> Self { + match token { + Some(s) => match s.to_str() { + Some(t) => match t { + "(" => Self::LParen, + "!" => Self::Bang, + "-a" | "-o" => Self::BoolOp(s), + "=" | "==" | "!=" | "<" | ">" => Self::Op(Operator::String(s)), + "-eq" | "-ge" | "-gt" | "-le" | "-lt" | "-ne" => Self::Op(Operator::Int(s)), + "-ef" | "-nt" | "-ot" => Self::Op(Operator::File(s)), + "-n" | "-z" => Self::UnaryOp(UnaryOperator::StrlenOp(s)), + "-b" | "-c" | "-d" | "-e" | "-f" | "-g" | "-G" | "-h" | "-k" | "-L" | "-N" + | "-O" | "-p" | "-r" | "-s" | "-S" | "-t" | "-u" | "-w" | "-x" => { + Self::UnaryOp(UnaryOperator::FiletestOp(s)) + } + _ => Self::Literal(s), + }, + None => Self::Literal(s), + }, + None => Self::None, + } + } + + /// Convert this Symbol into a [`Symbol::Literal`], useful for cases where + /// test treats an operator as a string operand (test has no reserved + /// words). + /// + /// # Panics + /// + /// Panics if `self` is [`Symbol::None`] + fn into_literal(self) -> Self { + Self::Literal(match self { + Self::LParen => OsString::from("("), + Self::Bang => OsString::from("!"), + Self::BoolOp(s) + | Self::Literal(s) + | Self::Op(Operator::String(s) | Operator::Int(s) | Operator::File(s)) + | Self::UnaryOp(UnaryOperator::StrlenOp(s) | UnaryOperator::FiletestOp(s)) => s, + Self::None => panic!(), + }) + } +} + +/// Implement Display trait for Symbol to make it easier to print useful errors. +/// We will try to match the format in which the symbol appears in the input. +impl std::fmt::Display for Symbol { + /// Format a Symbol for printing + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match &self { + Self::LParen => OsStr::new("("), + Self::Bang => OsStr::new("!"), + Self::BoolOp(s) + | Self::Literal(s) + | Self::Op(Operator::String(s) | Operator::Int(s) | Operator::File(s)) + | Self::UnaryOp(UnaryOperator::StrlenOp(s) | UnaryOperator::FiletestOp(s)) => { + OsStr::new(s) + } + Self::None => OsStr::new("None"), + }; + write!(f, "{}", s.quote()) + } +} + +/// Recursive descent parser for test, which converts a list of [`OsString`]s +/// (typically command line arguments) into a stack of Symbols in postfix +/// order. +/// +/// Grammar: +/// +/// EXPR → TERM | EXPR BOOLOP EXPR +/// TERM → ( EXPR ) +/// TERM → ! EXPR +/// TERM → UOP str +/// UOP → STRLEN | FILETEST +/// TERM → str OP str +/// TERM → str | 𝜖 +/// OP → STRINGOP | INTOP | FILEOP +/// STRINGOP → = | == | != +/// INTOP → -eq | -ge | -gt | -le | -lt | -ne +/// FILEOP → -ef | -nt | -ot +/// STRLEN → -n | -z +/// FILETEST → -b | -c | -d | -e | -f | -g | -G | -h | -k | -L | -N | -O | -p | +/// -r | -s | -S | -t | -u | -w | -x +/// BOOLOP → -a | -o +/// +#[derive(Debug)] +struct Parser { + tokens: Peekable>, + pub stack: Vec, +} + +impl Parser { + /// Construct a new Parser from a `Vec` of tokens. + fn new(tokens: Vec) -> Self { + Self { + tokens: tokens.into_iter().peekable(), + stack: vec![], + } + } + + /// Fetch the next token from the input stream as a Symbol. + fn next_token(&mut self) -> Symbol { + Symbol::new(self.tokens.next()) + } + + /// Consume the next token & verify that it matches the provided value. + fn expect(&mut self, value: &str) -> ParseResult<()> { + match self.next_token() { + Symbol::Literal(s) if s == value => Ok(()), + _ => Err(ParseError::Expected(value.quote().to_string())), + } + } + + /// Peek at the next token from the input stream, returning it as a Symbol. + /// The stream is unchanged and will return the same Symbol on subsequent + /// calls to `next()` or `peek()`. + fn peek(&mut self) -> Symbol { + Symbol::new(self.tokens.peek().cloned()) + } + + /// Test if the next token in the stream is a BOOLOP (-a or -o), without + /// removing the token from the stream. + fn peek_is_boolop(&mut self) -> bool { + matches!(self.peek(), Symbol::BoolOp(_)) + } + + /// Parse an expression. + /// + /// EXPR → TERM | EXPR BOOLOP EXPR + fn expr(&mut self) -> ParseResult<()> { + if !self.peek_is_boolop() { + self.term()?; + } + self.maybe_boolop()?; + Ok(()) + } + + /// Parse a term token and possible subsequent symbols: "(", "!", UOP, + /// literal, or None. + fn term(&mut self) -> ParseResult<()> { + let symbol = self.next_token(); + + match symbol { + Symbol::LParen => self.lparen()?, + Symbol::Bang => self.bang()?, + Symbol::UnaryOp(_) => { + // Three-argument string comparison: `-f = a` means "-f" = "a", not file test + let is_string_cmp = matches!(self.peek(), Symbol::Op(Operator::String(_))) + && !matches!(Symbol::new(self.tokens.clone().nth(1)), Symbol::None); + if is_string_cmp { + self.literal(symbol.into_literal())?; + } else { + self.uop(symbol); + } + } + Symbol::None => self.stack.push(symbol), + literal => self.literal(literal)?, + } + Ok(()) + } + + /// Parse a (possibly) parenthesized expression. + /// + /// test has no reserved keywords, so "(" will be interpreted as a literal + /// in certain cases: + /// + /// * when found at the end of the token stream + /// * when followed by a binary operator that is not _itself_ interpreted + /// as a literal + /// + fn lparen(&mut self) -> ParseResult<()> { + // Look ahead up to 3 tokens to determine if the lparen is being used + // as a grouping operator or should be treated as a literal string + let peek3: Vec = self + .tokens + .clone() + .take(3) + .map(|token| Symbol::new(Some(token))) + .collect(); + + match peek3.as_slice() { + // case 1: lparen is a literal when followed by nothing + [] => { + self.literal(Symbol::LParen.into_literal())?; + Ok(()) + } + + // case 2: error if end of stream is `( ` + [symbol] => Err(ParseError::MissingArgument(format!("{symbol}"))), + + // case 3: `( uop )` → parenthesized unary operation; + // this case ensures we don’t get confused by `( -f ) )` + // or `( -f ( )`, for example + [Symbol::UnaryOp(_), _, Symbol::Literal(s)] if s == ")" => { + let symbol = self.next_token(); + self.uop(symbol); + self.expect(")")?; + Ok(()) + } + + // case 4: binary comparison of literal lparen, e.g. `( != )` + [Symbol::Op(_), Symbol::Literal(s)] | [Symbol::Op(_), Symbol::Literal(s), _] + if s == ")" => + { + self.literal(Symbol::LParen.into_literal())?; + Ok(()) + } + + // case 5: after handling the prior cases, any single token inside + // parentheses is a literal, e.g. `( -f )` + [_, Symbol::Literal(s)] | [_, Symbol::Literal(s), _] if s == ")" => { + let symbol = self.next_token(); + self.literal(symbol)?; + self.expect(")")?; + Ok(()) + } + + // case 6: two binary ops in a row, treat the first op as a literal + [Symbol::Op(_), Symbol::Op(_), _] => { + let symbol = self.next_token(); + self.literal(symbol)?; + self.expect(")")?; + Ok(()) + } + + // case 7: if earlier cases didn’t match, `( op …` + // indicates binary comparison of literal lparen with + // anything _except_ ")" (case 4) + [Symbol::Op(_), _] | [Symbol::Op(_), _, _] => { + self.literal(Symbol::LParen.into_literal())?; + Ok(()) + } + + // Otherwise, lparen indicates the start of a parenthesized + // expression + _ => { + self.expr()?; + self.expect(")")?; + Ok(()) + } + } + } + + /// Parse a (possibly) negated expression. + /// + /// Example cases: + /// + /// * `! =`: negate the result of the implicit string length test of `=` + /// * `! = foo`: compare the literal strings `!` and `foo` + /// * `! = = str`: negate comparison of literal `=` and `str` + /// * `!`: bang followed by nothing is literal + /// * `! EXPR`: negate the result of the expression + /// + /// Combined Boolean & negation: + /// + /// * `! ( EXPR ) [BOOLOP EXPR]`: negate the parenthesized expression only + /// * `! UOP str BOOLOP EXPR`: negate the unary subexpression + /// * `! str BOOLOP str`: negate the entire Boolean expression + /// * `! str BOOLOP EXPR BOOLOP EXPR`: negate the value of the first `str` term + /// + fn bang(&mut self) -> ParseResult<()> { + match self.peek() { + Symbol::Op(_) | Symbol::BoolOp(_) => { + // we need to peek ahead one more token to disambiguate the first + // three cases listed above + let peek2 = Symbol::new(self.tokens.clone().nth(1)); + + match peek2 { + // case 1: `! ` + // case 3: `! = OP str` + Symbol::Op(_) | Symbol::None => { + // op is literal + let op = self.next_token().into_literal(); + self.literal(op)?; + self.stack.push(Symbol::Bang); + } + // case 2: ` OP str [BOOLOP EXPR]`. + _ => { + // bang is literal; parsing continues with op + self.literal(Symbol::Bang.into_literal())?; + self.maybe_boolop()?; + } + } + } + + // bang followed by nothing is literal + Symbol::None => self.stack.push(Symbol::Bang.into_literal()), + + _ => { + // peek ahead up to 4 tokens to determine if we need to negate + // the entire expression or just the first term + let peek4: Vec = self + .tokens + .clone() + .take(4) + .map(|token| Symbol::new(Some(token))) + .collect(); + + if let [Symbol::Literal(_), Symbol::BoolOp(_), Symbol::Literal(_)] = + peek4.as_slice() + { + // we peeked ahead 4 but there were only 3 tokens left + self.expr()?; + self.stack.push(Symbol::Bang); + } else { + self.term()?; + self.stack.push(Symbol::Bang); + } + } + } + Ok(()) + } + + /// Peek at the next token and parse it as a BOOLOP or string literal, + /// as appropriate. + fn maybe_boolop(&mut self) -> ParseResult<()> { + if self.peek_is_boolop() { + let symbol = self.next_token(); + + // BoolOp by itself interpreted as Literal + if let Symbol::None = self.peek() { + self.literal(symbol.into_literal())?; + } else { + self.boolop(symbol)?; + self.maybe_boolop()?; + } + } + Ok(()) + } + + /// Parse a Boolean expression. + /// + /// Logical and (-a) has higher precedence than or (-o), so in an + /// expression like `foo -o '' -a ''`, the and subexpression is evaluated + /// first. + fn boolop(&mut self, op: Symbol) -> ParseResult<()> { + if op == Symbol::BoolOp(OsString::from("-a")) { + self.term()?; + } else { + self.expr()?; + } + self.stack.push(op); + Ok(()) + } + + /// Parse a (possible) unary argument test (string length or file + /// attribute check). + /// + /// If a UOP is followed by nothing it is interpreted as a literal string. + fn uop(&mut self, op: Symbol) { + match self.next_token() { + Symbol::None => self.stack.push(op.into_literal()), + symbol => { + self.stack.push(symbol.into_literal()); + self.stack.push(op); + } + } + } + + /// Parse a string literal, optionally followed by a comparison operator + /// and a second string literal. + fn literal(&mut self, token: Symbol) -> ParseResult<()> { + self.stack.push(token.into_literal()); + + // EXPR → str OP str + if let Symbol::Op(_) = self.peek() { + let op = self.next_token(); + + match self.next_token() { + Symbol::None => { + return Err(ParseError::MissingArgument(format!("{op}"))); + } + token => self.stack.push(token.into_literal()), + } + + self.stack.push(op); + } + Ok(()) + } + + /// Parser entry point: parse the token stream `self.tokens`, storing the + /// resulting `Symbol` stack in `self.stack`. + fn parse(&mut self) -> ParseResult<()> { + self.expr()?; + + match self.tokens.next() { + Some(token) => Err(ParseError::ExtraArgument(token.quote().to_string())), + None => Ok(()), + } + } +} + +/// Parse the token stream `args`, returning a `Symbol` stack representing the +/// operations to perform in postfix order. +pub fn parse(args: Vec) -> ParseResult> { + let mut p = Parser::new(args); + p.parse()?; + Ok(p.stack) +} diff --git a/src/uu/test/src/test.rs b/src/uu/test/src/test.rs new file mode 100644 index 0000000..92b1dcd --- /dev/null +++ b/src/uu/test/src/test.rs @@ -0,0 +1,452 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore (vars) egid euid FiletestOp StrlenOp + +pub(crate) mod error; +mod parser; + +use clap::Command; +use error::{ParseError, ParseResult}; +use parser::{Operator, Symbol, UnaryOperator, parse}; +use std::ffi::{OsStr, OsString}; +use std::fs; +#[cfg(unix)] +use std::os::unix::fs::MetadataExt; +use uucore::display::Quotable; +use uucore::error::{UResult, USimpleError}; +use uucore::format_usage; +#[cfg(not(windows))] +use uucore::process::{getegid, geteuid}; + +use uucore::translate; + +// The help_usage method replaces util name (the first word) with {}. +// And, The format_usage method replaces {} with execution_phrase ( e.g. test or [ ). +// However, This test command has two util names. +// So, we use test or [ instead of {} so that the usage string is correct. + +// We use after_help so that this comes after the usage string (it would come before if we used about) + +pub fn uu_app() -> Command { + // Disable printing of -h and -v as valid alternatives for --help and --version, + // since we don't recognize -h and -v as help/version flags. + Command::new(uucore::util_name()) + .version(uucore::crate_version!()) + .help_template(uucore::localized_help_template(uucore::util_name())) + .about(translate!("test-about")) + .override_usage(format_usage(&translate!("test-usage"))) + .after_help(translate!("test-after-help")) +} + +#[uucore::main(no_signals)] +pub fn uumain(mut args: impl uucore::Args) -> UResult<()> { + let program = args.next().unwrap_or_else(|| OsString::from("test")); + let binary_name = uucore::util_name(); + let mut args: Vec<_> = args.collect(); + + if binary_name.ends_with('[') { + // If invoked as [ we should recognize --help and --version (but not -h or -v) + if args.len() == 1 && (args[0] == "--help" || args[0] == "--version") { + uucore::clap_localization::handle_clap_result( + uu_app(), + std::iter::once(program).chain(args.into_iter()), + )?; + return Ok(()); + } + // If invoked via name '[', matching ']' must be in the last arg + let last = args.pop(); + if last.as_deref() != Some(OsStr::new("]")) { + return Err(USimpleError::new( + 2, + translate!("test-error-missing-closing-bracket"), + )); + } + } + + let result = parse(args).map(|mut stack| eval(&mut stack))??; + + if result { Ok(()) } else { Err(1.into()) } +} + +/// Evaluate a stack of Symbols, returning the result of the evaluation or +/// an error message if evaluation failed. +fn eval(stack: &mut Vec) -> ParseResult { + macro_rules! pop_literal { + () => { + match stack.pop() { + Some(Symbol::Literal(s)) => s, + _ => panic!(), + } + }; + } + + let s = stack.pop(); + + match s { + Some(Symbol::Bang) => { + let result = eval(stack)?; + + Ok(!result) + } + Some(Symbol::Op(Operator::String(op))) => { + let b = pop_literal!(); + let a = pop_literal!(); + match op.to_string_lossy().as_ref() { + "!=" => Ok(a != b), + "<" => Ok(a < b), + ">" => Ok(a > b), + _ => Ok(a == b), + } + } + Some(Symbol::Op(Operator::Int(op))) => { + let b = pop_literal!(); + let a = pop_literal!(); + + Ok(integers(&a, &b, &op)?) + } + Some(Symbol::Op(Operator::File(op))) => { + let b = pop_literal!(); + let a = pop_literal!(); + Ok(files(&a, &b, &op)?) + } + Some(Symbol::UnaryOp(UnaryOperator::StrlenOp(op))) => { + let s = match stack.pop() { + Some(Symbol::Literal(s)) => s, + Some(Symbol::None) => OsString::from(""), + None => { + return Ok(true); + } + _ => { + return Err(ParseError::MissingArgument(op.quote().to_string())); + } + }; + + Ok(if op == "-z" { + s.is_empty() + } else { + !s.is_empty() + }) + } + Some(Symbol::UnaryOp(UnaryOperator::FiletestOp(op))) => { + let op = op.to_str().unwrap(); + + let f = pop_literal!(); + + Ok(match op { + "-b" => path(&f, &PathCondition::BlockSpecial), + "-c" => path(&f, &PathCondition::CharacterSpecial), + "-d" => path(&f, &PathCondition::Directory), + "-e" => path(&f, &PathCondition::Exists), + "-f" => path(&f, &PathCondition::Regular), + "-g" => path(&f, &PathCondition::GroupIdFlag), + "-G" => path(&f, &PathCondition::GroupOwns), + "-h" => path(&f, &PathCondition::SymLink), + "-k" => path(&f, &PathCondition::Sticky), + "-L" => path(&f, &PathCondition::SymLink), + "-N" => path(&f, &PathCondition::ExistsModifiedLastRead), + "-O" => path(&f, &PathCondition::UserOwns), + "-p" => path(&f, &PathCondition::Fifo), + "-r" => path(&f, &PathCondition::Readable), + "-S" => path(&f, &PathCondition::Socket), + "-s" => path(&f, &PathCondition::NonEmpty), + "-t" => isatty(&f)?, + "-u" => path(&f, &PathCondition::UserIdFlag), + "-w" => path(&f, &PathCondition::Writable), + "-x" => path(&f, &PathCondition::Executable), + _ => panic!(), + }) + } + Some(Symbol::Literal(s)) => Ok(!s.is_empty()), + Some(Symbol::None) | None => Ok(false), + Some(Symbol::BoolOp(op)) => { + if (op == "-a" || op == "-o") && stack.len() < 2 { + return Err(ParseError::UnaryOperatorExpected(op.quote().to_string())); + } + + let b = eval(stack)?; + let a = eval(stack)?; + + Ok(if op == "-a" { a && b } else { a || b }) + } + _ => Err(ParseError::ExpectedValue), + } +} + +/// Operations to compare integers +/// `a` is the left hand side +/// `b` is the left hand side +/// `op` the operation (ex: -eq, -lt, etc) +fn integers(a: &OsStr, b: &OsStr, op: &OsStr) -> ParseResult { + // Parse the two inputs + let a: i128 = a + .to_str() + .map(str::trim) + .and_then(|s| s.parse().ok()) + .ok_or_else(|| ParseError::InvalidInteger(a.quote().to_string()))?; + + let b: i128 = b + .to_str() + .map(str::trim) + .and_then(|s| s.parse().ok()) + .ok_or_else(|| ParseError::InvalidInteger(b.quote().to_string()))?; + + // Do the maths + Ok(match op.to_str() { + Some("-eq") => a == b, + Some("-ne") => a != b, + Some("-gt") => a > b, + Some("-ge") => a >= b, + Some("-lt") => a < b, + Some("-le") => a <= b, + _ => return Err(ParseError::UnknownOperator(op.quote().to_string())), + }) +} + +/// Operations to compare files metadata +/// `a` is the left hand side +/// `b` is the right hand side +/// `op` the operation (ex: -ef, -nt, etc) +fn files(a: &OsStr, b: &OsStr, op: &OsStr) -> ParseResult { + let f_a = fs::metadata(a); + let f_b = fs::metadata(b); + + let result = match (op.to_str(), f_a, f_b) { + #[cfg(unix)] + (Some("-ef"), Ok(f_a), Ok(f_b)) => f_a.ino() == f_b.ino() && f_a.dev() == f_b.dev(), + #[cfg(not(unix))] + (Some("-ef"), Ok(_), Ok(_)) => unimplemented!(), + (Some("-nt"), Ok(f_a), Ok(f_b)) => f_a.modified().unwrap() > f_b.modified().unwrap(), + (Some("-nt"), Ok(_), _) => true, + (Some("-ot"), Ok(f_a), Ok(f_b)) => f_a.modified().unwrap() < f_b.modified().unwrap(), + (Some("-ot"), _, Ok(_)) => true, + (Some("-ef" | "-nt" | "-ot"), _, _) => false, + (_, _, _) => return Err(ParseError::UnknownOperator(op.quote().to_string())), + }; + + Ok(result) +} + +fn isatty(fd: &OsStr) -> ParseResult { + fd.to_str() + .map(str::trim) + .and_then(|s| s.parse().ok()) + .ok_or_else(|| ParseError::InvalidInteger(fd.quote().to_string())) + .map(|i| unsafe { libc::isatty(i) == 1 }) +} + +#[derive(Eq, PartialEq)] +enum PathCondition { + BlockSpecial, + CharacterSpecial, + Directory, + Exists, + ExistsModifiedLastRead, + Regular, + GroupIdFlag, + GroupOwns, + SymLink, + Sticky, + UserOwns, + Fifo, + Readable, + Socket, + NonEmpty, + UserIdFlag, + Writable, + Executable, +} + +#[cfg(not(windows))] +fn path(path: &OsStr, condition: &PathCondition) -> bool { + use std::fs::Metadata; + use std::os::unix::fs::FileTypeExt; + + const S_ISUID: u32 = 0o4000; + const S_ISGID: u32 = 0o2000; + const S_ISVTX: u32 = 0o1000; + + enum Permission { + Read = 0o4, + Write = 0o2, + Execute = 0o1, + } + + let perm = |metadata: Metadata, p: Permission| { + if geteuid() == metadata.uid() { + metadata.mode() & ((p as u32) << 6) != 0 + } else if getegid() == metadata.gid() { + metadata.mode() & ((p as u32) << 3) != 0 + } else { + metadata.mode() & (p as u32) != 0 + } + }; + + let metadata = if condition == &PathCondition::SymLink { + fs::symlink_metadata(path) + } else { + fs::metadata(path) + }; + + let Ok(metadata) = metadata else { + return false; + }; + + let file_type = metadata.file_type(); + + match condition { + PathCondition::BlockSpecial => file_type.is_block_device(), + PathCondition::CharacterSpecial => file_type.is_char_device(), + PathCondition::Directory => file_type.is_dir(), + PathCondition::Exists => true, + PathCondition::ExistsModifiedLastRead => { + metadata.accessed().unwrap() < metadata.modified().unwrap() + } + PathCondition::Regular => file_type.is_file(), + PathCondition::GroupIdFlag => metadata.mode() & S_ISGID != 0, + PathCondition::GroupOwns => metadata.gid() == getegid(), + PathCondition::SymLink => metadata.file_type().is_symlink(), + PathCondition::Sticky => metadata.mode() & S_ISVTX != 0, + PathCondition::UserOwns => metadata.uid() == geteuid(), + PathCondition::Fifo => file_type.is_fifo(), + PathCondition::Readable => perm(metadata, Permission::Read), + PathCondition::Socket => file_type.is_socket(), + PathCondition::NonEmpty => metadata.size() > 0, + PathCondition::UserIdFlag => metadata.mode() & S_ISUID != 0, + PathCondition::Writable => perm(metadata, Permission::Write), + PathCondition::Executable => perm(metadata, Permission::Execute), + } +} + +#[cfg(windows)] +fn path(path: &OsStr, condition: &PathCondition) -> bool { + use std::fs::metadata; + + let Ok(stat) = metadata(path) else { + return false; + }; + + match condition { + PathCondition::BlockSpecial => false, + PathCondition::CharacterSpecial => false, + PathCondition::Directory => stat.is_dir(), + PathCondition::Exists => true, + PathCondition::ExistsModifiedLastRead => unimplemented!(), + PathCondition::Regular => stat.is_file(), + PathCondition::GroupIdFlag => false, + PathCondition::GroupOwns => unimplemented!(), + PathCondition::SymLink => false, + PathCondition::Sticky => false, + PathCondition::UserOwns => unimplemented!(), + PathCondition::Fifo => false, + PathCondition::Readable => true, + PathCondition::Socket => false, + PathCondition::NonEmpty => stat.len() > 0, + PathCondition::UserIdFlag => false, + PathCondition::Writable => !stat.permissions().readonly(), + PathCondition::Executable => std::path::Path::new(path) + .extension() + .and_then(|e| e.to_str()) + .is_some_and(|e| matches!(e, "exe" | "bat" | "cmd" | "com")), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::{ffi::OsStr, time::UNIX_EPOCH}; + use tempfile::NamedTempFile; + + #[test] + fn test_files_with_unknown_op() { + let a = NamedTempFile::new().unwrap(); + let b = NamedTempFile::new().unwrap(); + let a = OsStr::new(a.path()); + let b = OsStr::new(b.path()); + let op = OsStr::new("unknown_op"); + + assert!(files(a, b, op).is_err()); + } + + #[test] + #[cfg(unix)] + fn test_files_with_ef_op() { + let a = NamedTempFile::new().unwrap(); + let b = NamedTempFile::new().unwrap(); + let a = OsStr::new(a.path()); + let b = OsStr::new(b.path()); + let op = OsStr::new("-ef"); + + assert!(files(a, a, op).unwrap()); + assert!(!files(a, b, op).unwrap()); + assert!(!files(b, a, op).unwrap()); + + let existing_file = a; + let non_existing_file = OsStr::new("non_existing_file"); + + assert!(!files(existing_file, non_existing_file, op).unwrap()); + assert!(!files(non_existing_file, existing_file, op).unwrap()); + assert!(!files(non_existing_file, non_existing_file, op).unwrap()); + } + + #[test] + fn test_files_with_nt_op() { + let older_file = NamedTempFile::new().unwrap(); + older_file.as_file().set_modified(UNIX_EPOCH).unwrap(); + let older_file = OsStr::new(older_file.path()); + let newer_file = NamedTempFile::new().unwrap(); + let newer_file = OsStr::new(newer_file.path()); + let op = OsStr::new("-nt"); + + assert!(files(newer_file, older_file, op).unwrap()); + assert!(!files(older_file, newer_file, op).unwrap()); + + let existing_file = newer_file; + let non_existing_file = OsStr::new("non_existing_file"); + + assert!(files(existing_file, non_existing_file, op).unwrap()); + assert!(!files(non_existing_file, existing_file, op).unwrap()); + assert!(!files(non_existing_file, non_existing_file, op).unwrap()); + } + + #[test] + fn test_files_with_ot_op() { + let older_file = NamedTempFile::new().unwrap(); + older_file.as_file().set_modified(UNIX_EPOCH).unwrap(); + let older_file = OsStr::new(older_file.path()); + let newer_file = NamedTempFile::new().unwrap(); + let newer_file = OsStr::new(newer_file.path()); + let op = OsStr::new("-ot"); + + assert!(!files(newer_file, older_file, op).unwrap()); + assert!(files(older_file, newer_file, op).unwrap()); + + let existing_file = newer_file; + let non_existing_file = OsStr::new("non_existing_file"); + + assert!(!files(existing_file, non_existing_file, op).unwrap()); + assert!(files(non_existing_file, existing_file, op).unwrap()); + assert!(!files(non_existing_file, non_existing_file, op).unwrap()); + } + + #[test] + fn test_integer_op() { + let a = OsStr::new("18446744073709551616"); + let b = OsStr::new("0"); + assert!(!integers(a, b, OsStr::new("-lt")).unwrap()); + let a = OsStr::new("18446744073709551616"); + let b = OsStr::new("0"); + assert!(integers(a, b, OsStr::new("-gt")).unwrap()); + let a = OsStr::new("-1"); + let b = OsStr::new("0"); + assert!(integers(a, b, OsStr::new("-lt")).unwrap()); + let a = OsStr::new("42"); + let b = OsStr::new("42"); + assert!(integers(a, b, OsStr::new("-eq")).unwrap()); + let a = OsStr::new("42"); + let b = OsStr::new("42"); + assert!(!integers(a, b, OsStr::new("-ne")).unwrap()); + } +} diff --git a/src/uudiff/Cargo.toml b/src/uudiff/Cargo.toml new file mode 100644 index 0000000..cce59e0 --- /dev/null +++ b/src/uudiff/Cargo.toml @@ -0,0 +1,63 @@ +# spell-checker:ignore (features) bigdecimal zerocopy extendedbigdecimal tzdb zoneinfo logind + +[package] +name = "uudiff" +description = "uutils ~ 'diff' uutils code library (cross-platform)" +repository = "https://github.com/uutils/diffutils/tree/main/src/uudiff" +authors.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +version.workspace = true + +# [package.metadata.docs.rs] +# all-features = true + +[lints] +workspace = true + +[lib] +path = "src/lib/lib.rs" + +[features] +# TODO How are features centralized in this workspace file? +# instead of limiting to KiB, MiB, etc, one can write kib, mib, Mb or whatever case. +# TODO feat_allow_case_insensitive_number_units = [] + +[dependencies] +# bytecount.workspace = true +chrono.workspace = true +clap.workspace = true +# coreutils.workspace = true +regex.workspace = true +rand = { workspace = true } +same-file.workspace = true +tempfile = { workspace = true } +unicode-width.workspace = true +uucore.workspace = true +os_display = "0.1.3" + +# Fluent dependencies (always available for localization) +fluent = { workspace = true } +fluent-syntax = { workspace = true } +unic-langid = { workspace = true } +fluent-bundle = { workspace = true } +thiserror = { workspace = true } + +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = [ + "dir", + "fs", + "poll", + "signal", + "uio", + "user", + "zerocopy", +] } + +[dev-dependencies] +pretty_assertions.workspace = true +tempfile.workspace = true diff --git a/src/uudiff/LICENSE b/src/uudiff/LICENSE new file mode 120000 index 0000000..30cff74 --- /dev/null +++ b/src/uudiff/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/src/uudiff/build.rs b/src/uudiff/build.rs new file mode 100644 index 0000000..e3889c1 --- /dev/null +++ b/src/uudiff/build.rs @@ -0,0 +1,532 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; + +pub fn main() -> Result<(), Box> { + let out_dir = env::var("OUT_DIR")?; + + let mut embedded_file = File::create(Path::new(&out_dir).join("embedded_locales.rs"))?; + writeln!(embedded_file, "// Generated at compile time - do not edit")?; + writeln!( + embedded_file, + "// This file contains embedded English locale files" + )?; + writeln!(embedded_file)?; + // No imports needed for match-based lookup + writeln!(embedded_file)?; + + // Generate optimized lookup function instead of HashMap + writeln!( + embedded_file, + "pub fn get_embedded_locale(key: &str) -> Option<&'static str> {{" + )?; + writeln!(embedded_file, " match key {{")?; + + // Try to detect if we're building for a specific utility by checking build configuration + // This attempts to identify individual utility builds vs multicall binary builds + let target_utility = detect_target_utility(); + let locales_to_embed = get_locales_to_embed(); + + match target_utility { + Some(util_name) => { + // Embed only the specific utility's locale (cat.ftl for cat for example) + embed_single_utility_locale( + &mut embedded_file, + &project_root()?, + &util_name, + &locales_to_embed, + )?; + } + None => { + // Embed all utility locales (multicall binary or fallback) + embed_all_utility_locales(&mut embedded_file, &project_root()?, &locales_to_embed)?; + } + } + + writeln!(embedded_file, " _ => None,")?; + writeln!(embedded_file, " }}")?; + writeln!(embedded_file, "}}")?; + + embedded_file.flush()?; + Ok(()) +} + +/// Get the project root directory +/// +/// # Errors +/// +/// Returns an error if the `CARGO_MANIFEST_DIR` environment variable is not set +/// or if the current directory structure does not allow determining the project root. +fn project_root() -> Result> { + let manifest_dir = env::var("CARGO_MANIFEST_DIR")?; + let uudiff_path = Path::new(&manifest_dir); + + // Navigate from src/uudiff to project root + let project_root = uudiff_path + .parent() // src/ + .and_then(|p| p.parent()) // project root + .ok_or("Could not determine project root")?; + + Ok(project_root.to_path_buf()) +} + +/// Attempt to detect which specific utility is being built +fn detect_target_utility() -> Option { + use std::fs; + + // Tell Cargo to rerun if this environment variable changes + println!("cargo:rerun-if-env-changed=UUDIFF_TARGET_UTIL"); + + // First check if an explicit environment variable was set + if let Ok(target_util) = env::var("UUDIFF_TARGET_UTIL") { + if !target_util.is_empty() { + return Some(target_util); + } + } + + // Auto-detect utility name from CARGO_PKG_NAME if it's a uu_* package + if let Ok(pkg_name) = env::var("CARGO_PKG_NAME") { + if let Some(util_name) = pkg_name.strip_prefix("uu_") { + println!("cargo:warning=Auto-detected utility name: {util_name}"); + return Some(util_name.to_string()); + } + } + + // Check for a build configuration file in the target directory + if let Ok(target_dir) = env::var("CARGO_TARGET_DIR") { + let config_path = Path::new(&target_dir).join("uudiff_target_util.txt"); + if let Ok(content) = fs::read_to_string(&config_path) { + let util_name = content.trim(); + if !util_name.is_empty() && util_name != "multicall" { + return Some(util_name.to_string()); + } + } + } + + // Fallback: Check the default target directory + if let Ok(project_root) = project_root() { + let config_path = project_root.join("target/uudiff_target_util.txt"); + if let Ok(content) = fs::read_to_string(&config_path) { + let util_name = content.trim(); + if !util_name.is_empty() && util_name != "multicall" { + return Some(util_name.to_string()); + } + } + } + + // If no configuration found, assume multicall build + None +} + +/// Embed locale for a single specific utility +/// +/// # Errors +/// +/// Returns an error if the locales for `util_name` or `uudiff` cannot be found +/// or if writing to the `embedded_file` fails. +fn embed_single_utility_locale( + embedded_file: &mut File, + project_root: &Path, + util_name: &str, + locales_to_embed: &(String, Option), +) -> Result<(), Box> { + // Embed utility-specific locales + embed_component_locales(embedded_file, locales_to_embed, util_name, |locale| { + project_root + .join("src/uu") + .join(util_name) + .join(format!("locales/{locale}.ftl")) + })?; + + if util_name.ends_with("sum") { + embed_component_locales( + embedded_file, + locales_to_embed, + "checksum_common", + |locale| project_root.join(format!("src/uu/checksum_common/locales/{locale}.ftl")), + )?; + } + + // Always embed uudiff locale file if it exists + embed_component_locales(embedded_file, locales_to_embed, "uudiff", |locale| { + project_root.join(format!("src/uudiff/locales/{locale}.ftl")) + })?; + + Ok(()) +} + +/// Embed locale files for all utilities (multicall binary). +/// +/// # Errors +/// +/// Returns an error if the `src/uu` directory cannot be read, if any utility +/// locales cannot be embedded, or if flushing the `embedded_file` fails. +fn embed_all_utility_locales( + embedded_file: &mut File, + project_root: &Path, + locales_to_embed: &(String, Option), +) -> Result<(), Box> { + use std::fs; + + // Discover all uu_* directories + let src_uu_dir = project_root.join("src/uu"); + if !src_uu_dir.exists() { + // When src/uu doesn't exist (e.g., standalone uudiff from crates.io), + // embed a static list of utility locales that are commonly used + embed_static_utility_locales(embedded_file, locales_to_embed)?; + return Ok(()); + } + + let mut util_dirs = Vec::new(); + for entry in fs::read_dir(&src_uu_dir)? { + let entry = entry?; + if entry.file_type()?.is_dir() { + if let Some(dir_name) = entry.file_name().to_str() { + util_dirs.push(dir_name.to_string()); + } + } + } + util_dirs.sort(); + + // Embed locale files for each utility + for util_name in &util_dirs { + embed_component_locales(embedded_file, locales_to_embed, util_name, |locale| { + src_uu_dir + .join(util_name) + .join(format!("locales/{locale}.ftl")) + })?; + } + + // Also embed uudiff locale file if it exists + embed_component_locales(embedded_file, locales_to_embed, "uudiff", |locale| { + project_root.join(format!("src/uudiff/locales/{locale}.ftl")) + })?; + + embedded_file.flush()?; + Ok(()) +} + +/// Embed static utility locales for crates.io builds. +/// +/// # Errors +/// +/// Returns an error if the directory containing the crate cannot be read or +/// if writing to the `embedded_file` fails. +fn embed_static_utility_locales( + embedded_file: &mut File, + locales_to_embed: &(String, Option), +) -> Result<(), Box> { + use std::env; + + writeln!( + embedded_file, + " // Static utility locales for crates.io builds" + )?; + + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default(); + let Some(registry_dir) = Path::new(&manifest_dir).parent() else { + return Ok(()); // nothing to scan + }; + + // First, try to embed uudiff locales - critical for common translations like "Usage:" + embed_component_locales(embedded_file, locales_to_embed, "uudiff", |locale| { + Path::new(&manifest_dir).join(format!("locales/{locale}.ftl")) + })?; + + // Collect and sort for deterministic builds + let mut entries: Vec<_> = std::fs::read_dir(registry_dir)? + .filter_map(Result::ok) + .collect(); + entries.sort_by_key(std::fs::DirEntry::file_name); + + for entry in entries { + let file_name = entry.file_name(); + if let Some(dir_name) = file_name.to_str() { + // Match uu_- + if let Some((util_part, _)) = dir_name.split_once('-') { + if let Some(util_name) = util_part.strip_prefix("uu_") { + embed_component_locales( + embedded_file, + locales_to_embed, + util_name, + |locale| entry.path().join(format!("locales/{locale}.ftl")), + )?; + } + } + } + } + + Ok(()) +} + +/// Determines which locales to embed into the binary. +/// +/// To support localized messages in installed binaries (e.g., via `cargo install`), +/// this function identifies the user's current locale from the `LANG` environment +/// variable. +/// +/// It always includes "en-US" to ensure that a fallback is available if the +/// system locale's translation file is missing or if `LANG` is not set. +fn get_locales_to_embed() -> (String, Option) { + let system_locale = env::var("LANG").ok().and_then(|lang| { + let locale = lang.split('.').next()?.replace('_', "-"); + if locale != "en-US" && !locale.is_empty() { + Some(locale) + } else { + None + } + }); + ("en-US".to_string(), system_locale) +} + +/// Helper function to iterate over the locales to embed. +/// +/// # Errors +/// +/// Returns an error if the provided closure `f` returns an error when called +/// on either the primary or system locale. +fn for_each_locale( + locales: &(String, Option), + mut f: F, +) -> Result<(), Box> +where + F: FnMut(&str) -> Result<(), Box>, +{ + f(&locales.0)?; + if let Some(ref system_locale) = locales.1 { + f(system_locale)?; + } + Ok(()) +} + +/// Helper function to embed a single locale file. +/// +/// # Errors +/// +/// Returns an error if the file at `locale_path` cannot be read or if +/// writing to `embedded_file` fails. +fn embed_locale_file( + embedded_file: &mut File, + locale_path: &Path, + locale_key: &str, + locale: &str, + component: &str, +) -> Result<(), Box> { + use std::fs; + + if locale_path.exists() || locale_path.is_file() { + let content = fs::read_to_string(locale_path)?; + writeln!( + embedded_file, + " // Locale for {component} ({locale})" + )?; + // Determine if we need a hash. If content contains ", we need r#""# + let delimiter = if content.contains('"') { "#" } else { "" }; + writeln!( + embedded_file, + " \"{locale_key}\" => Some(r{delimiter}\"{content}\"{delimiter})," + )?; + + // Tell Cargo to rerun if this file changes + println!("cargo:rerun-if-changed={}", locale_path.display()); + } + Ok(()) +} + +/// Higher-level helper to embed locale files for a component with a path pattern. +/// +/// This eliminates the repetitive `for_each_locale` + `embed_locale_file` pattern. +/// +/// # Errors +/// +/// Returns an error if `for_each_locale` fails, which typically happens if +/// reading a locale file or writing to the `embedded_file` fails. +fn embed_component_locales( + embedded_file: &mut File, + locales: &(String, Option), + component_name: &str, + path_builder: F, +) -> Result<(), Box> +where + F: Fn(&str) -> PathBuf, +{ + for_each_locale(locales, |locale| { + let locale_path = path_builder(locale); + embed_locale_file( + embedded_file, + &locale_path, + &format!("{component_name}/{locale}.ftl"), + locale, + component_name, + ) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_locales_to_embed_no_lang() { + unsafe { + env::remove_var("LANG"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + + unsafe { + env::set_var("LANG", ""); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "en_US.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, None); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn get_locales_to_embed_with_lang() { + unsafe { + env::set_var("LANG", "fr_FR.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("fr-FR".to_string())); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "zh_CN.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("zh-CN".to_string())); + unsafe { + env::remove_var("LANG"); + } + + unsafe { + env::set_var("LANG", "de"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("de".to_string())); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn get_locales_to_embed_invalid_lang() { + // invalid locale format + unsafe { + env::set_var("LANG", "invalid"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("invalid".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // numeric values + unsafe { + env::set_var("LANG", "123"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("123".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // special characters + unsafe { + env::set_var("LANG", "@@@@"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("@@@@".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // malformed locale (no country code but with encoding) + unsafe { + env::set_var("LANG", "en.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("en".to_string())); + unsafe { + env::remove_var("LANG"); + } + + // valid format but unusual locale + unsafe { + env::set_var("LANG", "XX_YY.UTF-8"); + } + let (en_locale, system_locale) = get_locales_to_embed(); + assert_eq!(en_locale, "en-US"); + assert_eq!(system_locale, Some("XX-YY".to_string())); + unsafe { + env::remove_var("LANG"); + } + } + + #[test] + fn for_each_locale_basic() { + let locales = ("en-US".to_string(), Some("fr-FR".to_string())); + let mut collected = Vec::new(); + + for_each_locale(&locales, |locale| { + collected.push(locale.to_string()); + Ok(()) + }) + .unwrap(); + + assert_eq!(collected, vec!["en-US", "fr-FR"]); + } + + #[test] + fn for_each_locale_no_system_locale() { + let locales = ("en-US".to_string(), None); + let mut collected = Vec::new(); + + for_each_locale(&locales, |locale| { + collected.push(locale.to_string()); + Ok(()) + }) + .unwrap(); + + assert_eq!(collected, vec!["en-US"]); + } + + #[test] + fn for_each_locale_error_handling() { + let locales = ("en-US".to_string(), Some("fr-FR".to_string())); + + let result = for_each_locale(&locales, |_locale| Err("test error".into())); + + assert!(result.is_err()); + } +} diff --git a/src/uudiff/locales/en-US.ftl b/src/uudiff/locales/en-US.ftl new file mode 100644 index 0000000..8cb52ab --- /dev/null +++ b/src/uudiff/locales/en-US.ftl @@ -0,0 +1,98 @@ +# Error messages specific to DiffUtils +parse-error-conflicting-output-options = Conflicting output style options '--{ $opt1 }' and '--{ $opt2 }'. +parse-error-extra-operand = extra operand '{ $operand }' +parse-error-incompatible-options = options '--{ $opt1 }' and '--{ $opt2 }' are incompatible +parse-error-invalid-value = invalid value '{ $value }' for option '--{ $option }' +parse-error-invalid-context-length = invalid context length '{ $value }' +parse-error-invalid-unified-length = invalid unified length '{ $value }' +parse-error-invalid-value-overflow = invalid value '{ $value }' (too large) for option '--{ $option }' +parse-error-invalid-value-unit = invalid unit in '{ $value }' for option '--{ $option }' +parse-error-missing-operand = missing operand after '{ $after }' +parse-error-not-yet-implemented = the option '--{ $option}' is not yet implemented + +# *** This is a copy from coreutils *** +# Common strings shared across all uutils commands +# Mostly clap + +# Generic words +common-error = error +common-tip = tip +common-usage = Usage +common-help = help +common-version = version + +# Common clap error messages +clap-error-unexpected-argument = { $error_word }: unexpected option '{ $arg }' found +clap-error-unexpected-argument-simple = unexpected option +clap-error-similar-argument = { $tip_word }: a similar option exists: '{ $suggestion }' +clap-error-pass-as-value = { $tip_word }: to pass '{ $arg }' as a value, use '{ $tip_command }' +clap-error-invalid-value = { $error_word }: invalid value '{ $value }' for '{ $option }' +clap-error-value-required = { $error_word }: a value is required for '{ $option }' but none was supplied +clap-error-missing-required-arguments = { $error_word }: the following required options were not provided: +clap-error-possible-values = possible values +clap-error-help-suggestion = For more information, try '{ $command } --help'. +common-help-suggestion = For more information, try '--help'. +# For clap_localization +clap-error-ambiguous-argument=Error: Option '{ $arg }' is ambiguous. + Did you mean one of these? + +# Common help text patterns +help-flag-help = Print help information +help-flag-version = Print version information + +# Common error contexts +error-io = I/O error +error-permission-denied = Permission denied +error-file-not-found = No such file or directory +error-invalid-argument = Invalid argument +error-is-a-directory = { $file }: Is a directory + +# Common actions +action-copying = copying +action-moving = moving +action-removing = removing +action-creating = creating +action-reading = reading +action-writing = writing + +# SELinux error messages +selinux-error-not-enabled = SELinux is not enabled on this system +selinux-error-file-open-failure = failed to open the file: { $error } +selinux-error-context-retrieval-failure = failed to retrieve the security context: { $error } +selinux-error-context-set-failure = failed to set default file creation context to '{ $context }': { $error } +selinux-error-context-conversion-failure = failed to set default file creation context to '{ $context }': { $error } +selinux-error-operation-not-supported = operation not supported + +# SMACK error messages +smack-error-not-enabled = SMACK is not enabled on this system +smack-error-label-retrieval-failure = failed to get security context: { $error } +smack-error-label-set-failure = failed to set default file creation context to '{ $context }': { $error } +smack-error-no-label-set = no security context set + +# Safe traversal error messages +safe-traversal-error-path-contains-null = path contains null byte +safe-traversal-error-open-failed = failed to open { $path }: { $source } +safe-traversal-error-stat-failed = failed to stat { $path }: { $source } +safe-traversal-error-read-dir-failed = failed to read directory { $path }: { $source } +safe-traversal-error-unlink-failed = failed to unlink { $path }: { $source } +safe-traversal-error-invalid-fd = invalid file descriptor +safe-traversal-current-directory = +safe-traversal-directory = + +# checksum-related messages +checksum-no-properly-formatted = { $checksum_file }: no properly formatted checksum lines found +checksum-no-file-verified = { $checksum_file }: no file was verified +checksum-error-failed-to-read-input = failed to read input +checksum-bad-format = { $count -> + [1] { $count } line is improperly formatted + *[other] { $count } lines are improperly formatted +} +checksum-failed-cksum = { $count -> + [1] { $count } computed checksum did NOT match + *[other] { $count } computed checksums did NOT match +} +checksum-failed-open-file = { $count -> + [1] { $count } listed file could not be read + *[other] { $count } listed files could not be read +} +checksum-error-algo-bad-format = { $file }: { $line }: improperly formatted { $algo } checksum line diff --git a/src/uudiff/locales/fr-FR.ftl b/src/uudiff/locales/fr-FR.ftl new file mode 100644 index 0000000..f43896e --- /dev/null +++ b/src/uudiff/locales/fr-FR.ftl @@ -0,0 +1,89 @@ +# Error messages specific to DiffUtils +parse-error-conflicting-output-options = Options de formatage de sortie incompatibles '--{ $opt1 }' et '--{ $opt2 }'. +parse-error-extra-operand = opérande supplémentaire { $operand } +parse-error-incompatible-options = les options --{ $opt1 } and --{ $opt2 } sont incompatibles +parse-error-invalid-value = valeur invalide '{ $value }' pour '{ $option }' +parse-error-invalid-context-length = longueur de contexte non valide '{ $value }' +parse-error-invalid-unified-length = longueur unifiée non valide '{ $value }' +parse-error-invalid-value-overflow = valeur invalide '{ $value }' (trop grand) pour '{ $option }' +parse-error-invalid-value-unit = unité non valide dans '{ $value }' pour l'option '--{ $option }' +parse-error-missing-operand = opérande manquant après '{ $after }' +parse-error-not-yet-implemented = L'option '--{ $option}' n'est pas encore implémentée + +# *** This is a copy from coreutils *** +# Chaînes communes partagées entre toutes les commandes uutils +# Principalement pour clap + +# Mots génériques +common-error = erreur +common-tip = conseil +common-usage = Utilisation +common-help = aide +common-version = version + +# Messages d'erreur clap communs +clap-error-unexpected-argument = { $error_word } : argument inattendu '{ $arg }' trouvé +clap-error-unexpected-argument-simple = argument inattendu +clap-error-similar-argument = { $tip_word } : un argument similaire existe : '{ $suggestion }' +clap-error-pass-as-value = { $tip_word } : pour passer '{ $arg }' comme valeur, utilisez '{ $tip_command }' +clap-error-invalid-value = { $error_word } : valeur invalide '{ $value }' pour '{ $option }' +clap-error-value-required = { $error_word } : une valeur est requise pour '{ $option }' mais aucune n'a été fournie +clap-error-missing-required-arguments = { $error_word } : les arguments requis suivants n'ont pas été fournis : +clap-error-possible-values = valeurs possibles +clap-error-help-suggestion = Pour plus d'informations, essayez '{ $command } --help'. +common-help-suggestion = Pour plus d'informations, essayez '--help'. + +# Modèles de texte d'aide communs +help-flag-help = Afficher les informations d'aide +help-flag-version = Afficher les informations de version + +# Contextes d'erreur communs +error-io = Erreur E/S +error-permission-denied = Permission refusée +error-file-not-found = Aucun fichier ou répertoire de ce type +error-invalid-argument = Argument invalide +error-is-a-directory = { $file }: Est un répertoire + +# Actions communes +action-copying = copie +action-moving = déplacement +action-removing = suppression +action-creating = création +action-reading = lecture +action-writing = écriture + +# Messages d'erreur SELinux +selinux-error-not-enabled = SELinux n'est pas activé sur ce système +selinux-error-file-open-failure = échec de l'ouverture du fichier : { $error } +selinux-error-context-retrieval-failure = échec de la récupération du contexte de sécurité : { $error } +selinux-error-context-set-failure = échec de la définition du contexte de création de fichier par défaut à '{ $context }' : { $error } +selinux-error-context-conversion-failure = échec de la définition du contexte de création de fichier par défaut à '{ $context }' : { $error } +selinux-error-operation-not-supported = opération non prise en charge + +# Messages d'erreur de traversée sécurisée +safe-traversal-error-path-contains-null = le chemin contient un octet null +safe-traversal-error-open-failed = échec de l'ouverture de { $path } : { $source } +safe-traversal-error-stat-failed = échec de l'analyse de { $path } : { $source } +safe-traversal-error-read-dir-failed = échec de la lecture du répertoire { $path } : { $source } +safe-traversal-error-unlink-failed = échec de la suppression de { $path } : { $source } +safe-traversal-error-invalid-fd = descripteur de fichier invalide +safe-traversal-current-directory = +safe-traversal-directory = + +# Messages relatifs au module checksum +checksum-no-properly-formatted = { $checksum_file }: aucune ligne correctement formattée n'a été trouvée +checksum-no-file-verified = { $checksum_file }: aucun fichier n'a été vérifié +checksum-error-failed-to-read-input = échec de la lecture de l'entrée +checksum-bad-format = { $count -> + [1] { $count } ligne invalide + *[other] { $count } lignes invalides +} +checksum-failed-cksum = { $count -> + [1] { $count } somme de hachage ne correspond PAS + *[other] { $count } sommes de hachage ne correspondent PAS +} +checksum-failed-open-file = { $count -> + [1] { $count } fichier passé n'a pas pu être lu + *[other] { $count } fichiers passés n'ont pas pu être lu +} +checksum-error-algo-bad-format = { $file }: { $line }: ligne invalide pour { $algo } diff --git a/src/uudiff/src/lib/features.rs b/src/uudiff/src/lib/features.rs new file mode 100644 index 0000000..75a11fa --- /dev/null +++ b/src/uudiff/src/lib/features.rs @@ -0,0 +1,2 @@ +// #[cfg(feature = "benchmark")] +pub mod benchmark; diff --git a/src/uudiff/src/lib/features/benchmark.rs b/src/uudiff/src/lib/features/benchmark.rs new file mode 100644 index 0000000..7311ad1 --- /dev/null +++ b/src/uudiff/src/lib/features/benchmark.rs @@ -0,0 +1,227 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Common benchmark utilities for uutils diffutils. +//! +//! This module provides shared functionality for benchmarking utilities, +//! including test data generation and binary execution helpers. + +use std::ffi::OsString; + +/// Converts a String to a Vec which can be used as args \ +/// to pass to the utilities, e.g. "diff file_a file_b -w 150". +/// +/// # Returns +/// A vec OsString which can be used instead of ArgsOs. +pub fn str_to_args(args: &str) -> Vec { + let s: Vec = args + .split(' ') + .filter(|s| !s.is_empty()) + .map(OsString::from) + .collect(); + + s +} + +pub mod prepare_bench { + use std::{ + fs::File, + io::{BufWriter, Write}, + path::Path, + }; + + use rand::RngExt; + use tempfile::TempDir; + + /// When a file is changed to be different, a char is inserted. + const CHANGE_INDICATION_CHAR: u8 = b'#'; + // const FILE_SIZES_IN_KILO_BYTES: [u64; 2] = [100, 1 * 1000]; + + // file lines and .txt will be added + const FROM_FILE: &str = "from_file"; + const TO_FILE: &str = "to_file"; + const LINE_LENGTH: usize = 60; + + #[derive(Debug, Default)] + pub struct FilePair { + pub from: String, + pub to: String, + pub size_bytes: u64, + } + + /// Contains test data (file names) which only needs to be created once. + #[derive(Debug, Default)] + pub struct BenchContext { + /// Optional TempDir directory. When set, the dir is of no relevance. + pub tmp_dir: Option, + /// Directory path if TempDir is not set. + pub dir: String, + /// list of files in different sizes + pub files_equal: Vec, + /// list of files in different sizes + pub files_different: Vec, + } + + impl BenchContext { + pub fn get_path(&self) -> &Path { + match &self.tmp_dir { + Some(tmp) => tmp.path(), + None => Path::new(&self.dir), + } + } + + pub fn get_files_equal_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_equal(kb * 1000) + } + + pub fn get_files_equal(&self, bytes: u64) -> Option<&FilePair> { + let p = self.files_equal.iter().find(|f| f.size_bytes == bytes)?; + Some(p) + } + + pub fn get_files_different_kb(&self, kb: u64) -> Option<&FilePair> { + self.get_files_different(kb * 1000) + } + + pub fn get_files_different(&self, bytes: u64) -> Option<&FilePair> { + let p = self + .files_different + .iter() + .find(|f| f.size_bytes == bytes)?; + Some(p) + } + } + + /// Generates two test files for comparison with size. + /// + /// # Params + /// * dir: the directory where the files are created (TempDir suggested) + /// * bytes: the number of bytes the files will be long (exactly) + /// * num_difference: the number of differences inserted in the diff file + /// * id: added to the file names to differentiate for different tests + /// + /// # Returns + /// (from_file_name, to_file_name): Two files of the specified size in bytes. + /// + /// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes. + /// If num_differences is set, '#' will be inserted between the first two words of a line, + /// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest. + pub fn generate_test_files_bytes( + dir: &Path, + bytes: u64, + num_differences: u64, + id: &str, + ) -> std::io::Result { + let id = if id.is_empty() { + String::new() + } else { + format!("{id}_") + }; + let f1 = format!("{id}{FROM_FILE}_{bytes}.txt"); + let f2 = format!("{id}{TO_FILE}_{bytes}.txt"); + let from_path = dir.join(f1); + let to_path = dir.join(f2); + + generate_file_bytes(&from_path, &to_path, bytes, num_differences)?; + + Ok(FilePair { + from: from_path.to_string_lossy().to_string(), + to: to_path.to_string_lossy().to_string(), + size_bytes: bytes, + }) + } + + /// Generates two test files for comparison with size. + /// + /// # Returns + /// Ok when the files were created. + /// + /// Like [generate_test_files_bytes] with specified file names. \ + /// The function must generate two files at once to quickly create + /// files with minimal differences. + pub fn generate_file_bytes( + from_name: &Path, + to_name: &Path, + bytes: u64, + num_differences: u64, + ) -> std::io::Result<()> { + let file_from = File::create(from_name)?; + let file_to = File::create(to_name)?; + // for int division, lines will be smaller than requested bytes + let n_lines = bytes / LINE_LENGTH as u64; + let change_every_n_lines = if num_differences == 0 { + 0 + } else { + let c = n_lines / num_differences; + if c == 0 { 1 } else { c } + }; + // Use a larger 128KB buffer for massive files + let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from); + let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to); + let mut rng = rand::rng(); + + // Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes + let mut line_buffer = [b' '; 60]; + line_buffer[59] = b'\n'; // Set the newline once at the end + + for i in (0..n_lines).rev() { + // Fill only the letter positions, skipping spaces and the newline + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + + // Write the raw bytes directly to both files + writer_from.write_all(&line_buffer)?; + // make changes in the file + if num_differences == 0 { + writer_to.write_all(&line_buffer)?; + } else { + if i % change_every_n_lines == 0 && n_lines - i > 2 { + line_buffer[5] = CHANGE_INDICATION_CHAR; + } + writer_to.write_all(&line_buffer)?; + line_buffer[5] = b' '; + } + } + + // create last line + let missing = (bytes - n_lines * LINE_LENGTH as u64) as usize; + if missing > 0 { + for word_idx in 0..10 { + let start = word_idx * 6; // Each word + space block is 6 bytes + for i in 0..5 { + line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1); + } + } + line_buffer[missing - 1] = b'\n'; + writer_from.write_all(&line_buffer[0..missing])?; + writer_to.write_all(&line_buffer[0..missing])?; + } + + writer_from.flush()?; + writer_to.flush()?; + + Ok(()) + } +} + +/// Benchmark tools which are designed to call the compiled executable. +pub mod bench_binary { + use std::process::Command; + + use crate::benchmark::str_to_args; + + pub fn bench_binary(program: &str, cmd_args: &str) -> std::process::ExitStatus { + // TODO let mut cmd = cargo_bin_cmd!("diffutils"); + let args = str_to_args(cmd_args); + Command::new(program) + .args(args) + .status() + .expect("Failed to execute binary") + } +} diff --git a/src/uudiff/src/lib/lib.rs b/src/uudiff/src/lib/lib.rs new file mode 100644 index 0000000..e9df517 --- /dev/null +++ b/src/uudiff/src/lib/lib.rs @@ -0,0 +1,93 @@ +//## internal modules +mod features; // feature-gated code modules +mod macros; // crate macros (macro_rules-type; exported to `crate::...`) +mod mods; // core cross-platform modules + +pub use crate::mods::utils; + +// * cross-platform modules +pub use crate::mods::clap_localization; +pub use crate::mods::common_errors; +pub use crate::mods::error; +pub use crate::mods::locale; + +// * feature-gated modules +// #[cfg(feature = "benchmark")] +pub use crate::features::benchmark; + +/// Execute utility code for `util`. +/// +/// This macro expands to a main function that invokes the `uumain` function in `util` +/// Exits with code returned by `uumain`. +#[macro_export] +macro_rules! bin { + ($util:ident) => { + pub fn main() { + use std::io::Write; + use uudiff::locale; + + // Preserve inherited SIGPIPE settings (e.g., from env --default-signal=PIPE) + uucore::panic::preserve_inherited_sigpipe(); + + // suppress extraneous error output for SIGPIPE failures/panics + uucore::panic::mute_sigpipe_panic(); + locale::setup_localization(uucore::get_canonical_util_name(stringify!($util))) + .unwrap_or_else(|err| { + match err { + uudiff::locale::LocalizationError::ParseResource { + error: err_msg, + snippet, + } => eprintln!("Localization parse error at {snippet}: {err_msg:?}"), + other => eprintln!("Could not init the localization system: {other}"), + } + std::process::exit(99) + }); + + // execute utility code + let code = $util::uumain(uucore::args_os()); + // (defensively) flush stdout for utility prior to exit; see + if let Err(e) = std::io::stdout().flush() { + eprintln!("Error flushing stdout: {e}"); + } + + std::process::exit(code); + } + }; +} + +/// Create a localized help template with explicit color control +/// This ensures color detection consistency between clap and our template +pub fn localized_help_template_with_colors( + util_name: &str, + colors_enabled: bool, +) -> clap::builder::StyledStr { + use std::fmt::Write; + + // Ensure localization is initialized for this utility + let _ = locale::setup_localization(util_name); + + // Get the localized "Usage" label + let usage_label = crate::locale::translate!("common-usage"); + + // Create a styled template + let mut template = clap::builder::StyledStr::new(); + + // Add the basic template parts + writeln!(template, "{{before-help}}{{about-with-newline}}").unwrap(); + + // Add styled usage header (bold + underline like clap's default) + if colors_enabled { + write!( + template, + "\x1b[1m\x1b[4m{usage_label}:\x1b[0m {{usage}}\n\n" + ) + .unwrap(); + } else { + write!(template, "{usage_label}: {{usage}}\n\n").unwrap(); + } + + // Add the rest + write!(template, "{{all-args}}{{after-help}}").unwrap(); + + template +} diff --git a/src/macros.rs b/src/uudiff/src/lib/macros.rs similarity index 76% rename from src/macros.rs rename to src/uudiff/src/lib/macros.rs index 90a4eaa..81e0461 100644 --- a/src/macros.rs +++ b/src/uudiff/src/lib/macros.rs @@ -1,5 +1,12 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore replacen + // asserts equality of the actual diff and expected diff -// considering datetime varitations +// considering datetime variations // // It replaces the modification time in the actual diff // with placeholder "TIMESTAMP" and then asserts the equality diff --git a/src/uudiff/src/lib/mods.rs b/src/uudiff/src/lib/mods.rs new file mode 100644 index 0000000..5192348 --- /dev/null +++ b/src/uudiff/src/lib/mods.rs @@ -0,0 +1,6 @@ +// pub mod arg_parser; +pub mod clap_localization; +pub mod common_errors; +pub mod error; +pub mod locale; +pub mod utils; diff --git a/src/uudiff/src/lib/mods/clap_localization.rs b/src/uudiff/src/lib/mods/clap_localization.rs new file mode 100644 index 0000000..5811054 --- /dev/null +++ b/src/uudiff/src/lib/mods/clap_localization.rs @@ -0,0 +1,744 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore (path) osrelease myutil + +//! Helper clap functions to localize error handling and options +//! +//! This module provides utilities for handling clap errors with localization support. +//! It uses clap's error context API to extract structured information from errors +//! instead of parsing error strings, providing a more robust solution. +//! + +use crate::error::{UResult, USimpleError}; +use crate::locale::translate; + +use clap::error::{ContextKind, ErrorKind}; +use clap::{ArgMatches, Command, Error}; + +use std::error::Error as StdError; +use std::ffi::OsString; + +use std::io::Write as _; +use std::io::stderr; + +/// Color enum for consistent styling +#[derive(Debug, Clone, Copy)] +pub enum Color { + Red, + Yellow, + Green, +} + +impl Color { + fn code(self) -> &'static str { + match self { + Self::Red => "31", + Self::Yellow => "33", + Self::Green => "32", + } + } +} + +/// Determine color choice based on environment variables +fn get_color_choice() -> clap::ColorChoice { + if std::env::var("NO_COLOR").is_ok() { + clap::ColorChoice::Never + } else if std::env::var("CLICOLOR_FORCE").is_ok() || std::env::var("FORCE_COLOR").is_ok() { + clap::ColorChoice::Always + } else { + clap::ColorChoice::Auto + } +} + +/// Generic helper to check if colors should be enabled for a given stream +fn should_use_color_for_stream(stream: &S) -> bool { + match get_color_choice() { + clap::ColorChoice::Always => true, + clap::ColorChoice::Never => false, + clap::ColorChoice::Auto => { + stream.is_terminal() && std::env::var("TERM").unwrap_or_default() != "dumb" + } + } +} + +/// Manages color output based on environment settings +struct ColorManager(bool); + +impl ColorManager { + /// Create a new ColorManager based on environment variables + fn from_env() -> Self { + Self(should_use_color_for_stream(&stderr())) + } + + /// Apply color to text if colors are enabled + fn colorize(&self, text: &str, color: Color) -> String { + if self.0 { + format!("\x1b[{}m{text}\x1b[0m", color.code()) + } else { + text.to_string() + } + } +} + +/// Unified error formatter that handles all error types consistently +pub struct ErrorFormatter<'a> { + color_mgr: ColorManager, + util_name: &'a str, +} + +impl<'a> ErrorFormatter<'a> { + pub fn new(util_name: &'a str) -> Self { + Self { + color_mgr: ColorManager::from_env(), + util_name, + } + } + + /// Print error and exit with the specified code + fn print_error_and_exit(&self, err: &Error, exit_code: i32) -> ! { + self.print_error_and_exit_with_callback(err, exit_code, || {}) + } + + /// Print error with optional callback before exit + pub fn print_error_and_exit_with_callback( + &self, + err: &Error, + exit_code: i32, + callback: F, + ) -> ! + where + F: FnOnce(), + { + let code = self.print_error(err, exit_code); + callback(); + std::process::exit(code); + } + + /// Print error and return exit code (no exit call) + pub fn print_error(&self, err: &Error, exit_code: i32) -> i32 { + match err.kind() { + ErrorKind::DisplayHelp | ErrorKind::DisplayVersion => self.handle_display_errors(err), + ErrorKind::UnknownArgument => self.handle_unknown_argument(err, exit_code), + ErrorKind::InvalidValue | ErrorKind::ValueValidation => { + self.handle_invalid_value(err, exit_code) + } + ErrorKind::MissingRequiredArgument => self.handle_missing_required(err, exit_code), + ErrorKind::TooFewValues | ErrorKind::TooManyValues | ErrorKind::WrongNumberOfValues => { + // These need full clap formatting + let _ = write!(stderr(), "{}", err.render()); + exit_code + } + _ => self.handle_generic_error(err, exit_code), + } + } + + /// Handle help and version display + fn handle_display_errors(&self, err: &Error) -> i32 { + print!("{}", err.render()); + 0 + } + + /// Handle unknown argument errors + fn handle_unknown_argument(&self, err: &Error, exit_code: i32) -> i32 { + if let Some(invalid_arg) = err.get(ContextKind::InvalidArg) { + let arg_str = invalid_arg.to_string(); + let error_word = translate!("common-error"); + + // Print main error + let _ = write!( + stderr(), + "{}\n\n", + translate!( + "clap-error-unexpected-argument", + "arg" => self.color_mgr.colorize(&arg_str, Color::Yellow), + "error_word" => self.color_mgr.colorize(&error_word, Color::Red) + ) + ); + + // Show suggestion if available + if let Some(suggested_arg) = err.get(ContextKind::SuggestedArg) { + let tip_word = translate!("common-tip"); + let _ = write!( + stderr(), + "{}\n\n", + translate!( + "clap-error-similar-argument", + "tip_word" => self.color_mgr.colorize(&tip_word, Color::Green), + "suggestion" => self.color_mgr.colorize(&suggested_arg.to_string(), Color::Green) + ) + ); + } else { + // Look for other tips from clap + self.print_clap_tips(err); + } + + self.print_usage_and_help(); + } else { + self.print_simple_error_msg(&translate!("clap-error-unexpected-argument-simple")); + } + exit_code + } + + /// Handle invalid value errors + fn handle_invalid_value(&self, err: &Error, exit_code: i32) -> i32 { + let invalid_arg = err.get(ContextKind::InvalidArg); + let invalid_value = err.get(ContextKind::InvalidValue); + + if let (Some(arg), Some(value)) = (invalid_arg, invalid_value) { + let option = arg.to_string(); + let value = value.to_string(); + + if value.is_empty() { + // Value required but not provided + let error_word = translate!("common-error"); + let _ = writeln!( + stderr(), + "{}", + translate!("clap-error-value-required", + "error_word" => self.color_mgr.colorize(&error_word, Color::Red), + "option" => self.color_mgr.colorize(&option, Color::Green)) + ); + } else { + // Invalid value provided + let error_word = translate!("common-error"); + let error_msg = translate!( + "clap-error-invalid-value", + "error_word" => self.color_mgr.colorize(&error_word, Color::Red), + "value" => self.color_mgr.colorize(&value, Color::Yellow), + "option" => self.color_mgr.colorize(&option, Color::Green) + ); + // Include validation error if present + match err.source() { + Some(source) if matches!(err.kind(), ErrorKind::ValueValidation) => { + let _ = writeln!(stderr(), "{error_msg}: {source}"); + } + _ => eprintln!("{error_msg}"), + } + } + + // Show possible values for InvalidValue errors + if matches!(err.kind(), ErrorKind::InvalidValue) { + if let Some(valid_values) = err.get(ContextKind::ValidValue) { + if !valid_values.to_string().is_empty() { + let _ = writeln!( + stderr(), + "\n [{}: {valid_values}]", + translate!("clap-error-possible-values") + ); + } + } + } + let _ = writeln!(stderr(), "\n{}", translate!("common-help-suggestion")); + } else { + self.print_simple_error_msg(&err.render().to_string()); + } + + // InvalidValue errors traditionally use exit code 1 for backward compatibility + // But if a utility explicitly requests a high exit code (>= 125), respect it + // This allows utilities like runcon (125) to override the default while preserving + // the standard behavior for utilities using normal error codes (1, 2, etc.) + if matches!(err.kind(), ErrorKind::InvalidValue) && exit_code < 125 { + 1 // Force exit code 1 for InvalidValue unless using special exit codes + } else { + exit_code // Respect the requested exit code for special cases + } + } + + /// Handle missing required argument errors + fn handle_missing_required(&self, err: &Error, exit_code: i32) -> i32 { + let rendered_str = err.render().to_string(); + let lines: Vec<&str> = rendered_str.lines().collect(); + + match lines.first() { + Some(first_line) + if first_line + .starts_with("error: the following required arguments were not provided:") => + { + let error_word = translate!("common-error"); + let _ = writeln!( + stderr(), + "{}", + translate!( + "clap-error-missing-required-arguments", + "error_word" => self.color_mgr.colorize(&error_word, Color::Red) + ) + ); + + // Print the missing arguments + for line in lines.iter().skip(1) { + if line.starts_with(" ") { + let _ = writeln!(stderr(), "{line}"); + } else if line.starts_with("Usage:") || line.starts_with("For more information") + { + break; + } + } + let _ = writeln!(stderr()); + + // Print usage + lines + .iter() + .skip_while(|line| !line.starts_with("Usage:")) + .for_each(|line| { + if line.starts_with("For more information, try '--help'.") { + let _ = writeln!(stderr(), "{}", translate!("common-help-suggestion")); + } else { + let _ = writeln!(stderr(), "{line}"); + } + }); + } + _ => eprint!("{}", err.render()), + } + exit_code + } + + /// Handle generic errors + fn handle_generic_error(&self, err: &Error, exit_code: i32) -> i32 { + let rendered_str = err.render().to_string(); + if let Some(main_error_line) = rendered_str.lines().next() { + self.print_localized_error_line(main_error_line); + let _ = writeln!(stderr(), "\n{}", translate!("common-help-suggestion")); + } else { + let _ = write!(stderr(), "{}", err.render()); + } + exit_code + } + + /// Print a simple error message (no exit) + fn print_simple_error_msg(&self, message: &str) { + let error_word = translate!("common-error"); + let _ = writeln!( + stderr(), + "{}: {message}", + self.color_mgr.colorize(&error_word, Color::Red) + ); + } + + /// Print error line with localized "error:" prefix + fn print_localized_error_line(&self, line: &str) { + let error_word = translate!("common-error"); + let colored_error = self.color_mgr.colorize(&error_word, Color::Red); + + if let Some(colon_pos) = line.find(':') { + let after_colon = &line[colon_pos..]; + let _ = writeln!(stderr(), "{colored_error}{after_colon}"); + } else { + let _ = writeln!(stderr(), "{line}"); + } + } + + /// Extract and print clap's built-in tips + fn print_clap_tips(&self, err: &Error) { + let rendered_str = err.render().to_string(); + for line in rendered_str.lines() { + let trimmed = line.trim_start(); + if trimmed.starts_with("tip:") && !line.contains("similar argument") { + let tip_word = translate!("common-tip"); + if let Some(colon_pos) = trimmed.find(':') { + let after_colon = &trimmed[colon_pos..]; + let _ = writeln!( + stderr(), + " {}{after_colon}", + self.color_mgr.colorize(&tip_word, Color::Green) + ); + } else { + let _ = writeln!(stderr(), "{line}"); + } + let _ = writeln!(stderr()); + } + } + } + + /// Print usage information and help suggestion + fn print_usage_and_help(&self) { + let usage_key = format!("{}-usage", self.util_name); + let usage_text = translate!(&usage_key); + let formatted_usage = uucore::format_usage(&usage_text); + let usage_label = translate!("common-usage"); + let _ = writeln!( + stderr(), + "{usage_label}: {formatted_usage}\n\n{}", + translate!("common-help-suggestion") + ); + } +} + +/// Handles clap command parsing results with proper localization support. +/// +/// This is the main entry point for processing command-line arguments with localized error messages. +/// It parses the provided arguments and returns either the parsed matches or handles errors with +/// localized messages. +/// +/// # Arguments +/// +/// * `cmd` - The clap `Command` to parse arguments against +/// * `itr` - An iterator of command-line arguments to parse +/// +/// # Returns +/// +/// * `Ok(ArgMatches)` - Successfully parsed command-line arguments +/// * `Err` - For help/version display (preserves original styling) +/// +/// # Examples +/// +/// ```no_run +/// use clap::Command; +/// use uudiff::clap_localization::handle_clap_result; +/// +/// let cmd = Command::new("myutil"); +/// let args = vec!["myutil", "--help"]; +/// let result = handle_clap_result(cmd, args); +/// ``` +pub fn handle_clap_result(cmd: Command, itr: I) -> UResult +where + I: IntoIterator, + T: Into + Clone, +{ + handle_clap_result_with_exit_code(cmd, itr, 1) +} + +/// Handles clap command parsing with a custom exit code for errors. +/// +/// Similar to `handle_clap_result` but allows specifying a custom exit code +/// for error conditions. This is useful for utilities that need specific +/// exit codes for different error types. +/// +/// # Arguments +/// +/// * `cmd` - The clap `Command` to parse arguments against +/// * `itr` - An iterator of command-line arguments to parse +/// * `exit_code` - The exit code to use when exiting due to an error +/// +/// # Returns +/// +/// * `Ok(ArgMatches)` - Successfully parsed command-line arguments +/// * `Err` - For help/version display (preserves original styling) +/// +/// # Exit Behavior +/// +/// This function will call `std::process::exit()` with the specified exit code +/// when encountering parsing errors (except help/version which use exit code 0). +/// +/// # Examples +/// +/// ```no_run +/// use clap::Command; +/// use uudiff::clap_localization::handle_clap_result_with_exit_code; +/// +/// let cmd = Command::new("myutil"); +/// let args = vec!["myutil", "--invalid"]; +/// let result = handle_clap_result_with_exit_code(cmd, args, 125); +/// ``` +pub fn handle_clap_result_with_exit_code( + mut cmd: Command, + itr: I, + exit_code: i32, +) -> UResult +where + I: IntoIterator, + T: Into + Clone, +{ + // cloning args for double use in error case + let args = itr.into_iter().collect::>(); + let itr = args.clone(); + + // using mut to avoid cloning cmd + cmd.try_get_matches_from_mut(itr).map_err(|e| { + if e.exit_code() == 0 { + e.into() // Preserve help/version + } else { + // find ambiguous options + if e.kind() == ErrorKind::UnknownArgument || e.kind() == ErrorKind::InvalidSubcommand { + // Find the string the user actually typed (e.g., "--de") + // for arg in &itr {} + let args_str: Vec = args + .into_iter() + .map(|t| { + let o: OsString = t.into(); + o.to_string_lossy().to_string() + }) + .collect(); + if let Some(provided) = args_str.iter().find(|a| a.starts_with("--")) { + let search_term = provided.trim_start_matches("--"); + + // Manually filter all defined long arguments + let mut matches: Vec<_> = cmd + .get_arguments() + .filter_map(|arg| arg.get_long()) + .filter(|l| l.starts_with(search_term)) + .collect(); + + if matches.len() > 1 { + let mut msg = + translate!("clap-error-ambiguous-argument", "arg" => provided); + matches.sort_unstable(); + for m in matches { + msg.push_str(&format!("\n --{m}")); + } + return USimpleError::new(exit_code, msg); + } + } + } + + let formatter = ErrorFormatter::new(uucore::util_name()); + let code = formatter.print_error(&e, exit_code); + USimpleError::new(code, "") + } + }) +} + +/// Handles a clap error directly with a custom exit code. +/// +/// This function processes a clap error and exits the program with the specified +/// exit code. It formats error messages with proper localization and color support +/// based on environment variables. +/// +/// # Arguments +/// +/// * `err` - The clap `Error` to handle +/// * `exit_code` - The exit code to use when exiting +/// +/// # Panics +/// +/// This function never returns - it always calls `std::process::exit()`. +/// +/// # Examples +/// +/// ```no_run +/// use clap::Command; +/// use uudiff::clap_localization::handle_clap_error_with_exit_code; +/// +/// let cmd = Command::new("myutil"); +/// match cmd.try_get_matches() { +/// Ok(matches) => { /* handle matches */ }, +/// Err(e) => handle_clap_error_with_exit_code(e, 1), +/// } +/// ``` +pub fn handle_clap_error_with_exit_code(err: Error, exit_code: i32) -> ! { + let formatter = ErrorFormatter::new(uucore::util_name()); + formatter.print_error_and_exit(&err, exit_code); +} + +/// Configures a clap `Command` with proper localization and color settings. +/// +/// This function sets up a `Command` with: +/// - Appropriate color settings based on environment variables (`NO_COLOR`, `CLICOLOR_FORCE`, etc.) +/// - Localized help template with proper formatting +/// - TTY detection for automatic color enabling/disabling +/// +/// # Arguments +/// +/// * `cmd` - The clap `Command` to configure +/// +/// # Returns +/// +/// The configured `Command` with localization and color settings applied. +/// +/// # Environment Variables +/// +/// The following environment variables affect color output: +/// - `NO_COLOR` - Disables all color output +/// - `CLICOLOR_FORCE` or `FORCE_COLOR` - Forces color output even when not in a TTY +/// - `TERM` - If set to "dumb", colors are disabled in auto mode +/// +/// # Examples +/// +/// ```no_run +/// use clap::Command; +/// use uudiff::clap_localization::configure_localized_command; +/// +/// let cmd = Command::new("myutil") +/// .arg(clap::Arg::new("input").short('i')); +/// let configured_cmd = configure_localized_command(cmd); +/// ``` +pub fn configure_localized_command(mut cmd: Command) -> Command { + let color_choice = get_color_choice(); + cmd = cmd.color(color_choice); + + // For help output (stdout), we check stdout TTY status + let colors_enabled = should_use_color_for_stream(&std::io::stdout()); + + cmd = cmd.help_template(crate::localized_help_template_with_colors( + uucore::util_name(), + colors_enabled, + )); + cmd +} + +/* spell-checker: disable */ +#[cfg(test)] +mod tests { + use super::*; + use clap::{Arg, Command}; + use std::ffi::OsString; + + #[test] + fn test_color_codes() { + assert_eq!(Color::Red.code(), "31"); + assert_eq!(Color::Yellow.code(), "33"); + assert_eq!(Color::Green.code(), "32"); + } + + #[test] + fn test_color_manager() { + let mgr = ColorManager(true); + let red_text = mgr.colorize("error", Color::Red); + assert_eq!(red_text, "\x1b[31merror\x1b[0m"); + + let mgr_disabled = ColorManager(false); + let plain_text = mgr_disabled.colorize("error", Color::Red); + assert_eq!(plain_text, "error"); + } + + fn create_test_command() -> Command { + Command::new("test") + .arg( + Arg::new("input") + .short('i') + .long("input") + .value_name("FILE") + .help("Input file"), + ) + .arg( + Arg::new("output") + .short('o') + .long("output") + .value_name("FILE") + .help("Output file"), + ) + .arg( + Arg::new("format") + .long("format") + .value_parser(["json", "xml", "csv"]) + .help("Output format"), + ) + } + + #[test] + fn test_handle_clap_result_with_valid_args() { + let cmd = create_test_command(); + let result = handle_clap_result(cmd, vec!["test", "--input", "file.txt"]); + assert!(result.is_ok()); + let matches = result.unwrap(); + assert_eq!(matches.get_one::("input").unwrap(), "file.txt"); + } + + #[test] + fn test_handle_clap_result_with_osstring() { + let args: Vec = vec!["test".into(), "--output".into(), "out.txt".into()]; + let cmd = create_test_command(); + let result = handle_clap_result(cmd, args); + assert!(result.is_ok()); + let matches = result.unwrap(); + assert_eq!(matches.get_one::("output").unwrap(), "out.txt"); + } + + #[test] + fn test_configure_localized_command() { + let cmd = Command::new("test"); + let configured = configure_localized_command(cmd); + // The command should have color and help template configured + // We can't easily test the internal state, but we can verify it doesn't panic + assert_eq!(configured.get_name(), "test"); + } + + #[test] + fn test_color_environment_vars() { + use std::env; + + // Test NO_COLOR disables colors + unsafe { + env::set_var("NO_COLOR", "1"); + } + assert_eq!(get_color_choice(), clap::ColorChoice::Never); + assert!(!should_use_color_for_stream(&stderr())); + let mgr = ColorManager::from_env(); + assert!(!mgr.0); + unsafe { + env::remove_var("NO_COLOR"); + } + + // Test CLICOLOR_FORCE enables colors + unsafe { + env::set_var("CLICOLOR_FORCE", "1"); + } + assert_eq!(get_color_choice(), clap::ColorChoice::Always); + assert!(should_use_color_for_stream(&stderr())); + let mgr = ColorManager::from_env(); + assert!(mgr.0); + unsafe { + env::remove_var("CLICOLOR_FORCE"); + } + + // Test FORCE_COLOR also enables colors + unsafe { + env::set_var("FORCE_COLOR", "1"); + } + assert_eq!(get_color_choice(), clap::ColorChoice::Always); + assert!(should_use_color_for_stream(&stderr())); + unsafe { + env::remove_var("FORCE_COLOR"); + } + } + + #[test] + fn test_error_formatter_creation() { + let formatter = ErrorFormatter::new("test"); + assert_eq!(formatter.util_name, "test"); + // Color manager should be created based on environment + } + + #[test] + fn test_localization_keys_exist() { + use crate::locale::{get_message, setup_localization}; + + let _ = setup_localization("test"); + + let required_keys = [ + "common-error", + "common-usage", + "common-tip", + "common-help-suggestion", + "clap-error-unexpected-argument", + "clap-error-invalid-value", + "clap-error-missing-required-arguments", + "clap-error-similar-argument", + "clap-error-possible-values", + "clap-error-value-required", + ]; + + for key in &required_keys { + let message = get_message(key); + assert_ne!(message, *key, "Translation missing for key: {key}"); + } + } + + #[test] + fn test_french_localization() { + use crate::locale::{get_message, setup_localization}; + use std::env; + + let original_lang = env::var_os("LANG").unwrap_or_default(); + + unsafe { + env::set_var("LANG", "fr_FR.UTF-8"); + } + + if setup_localization("test").is_ok() { + assert_eq!(get_message("common-error"), "erreur"); + assert_eq!(get_message("common-usage"), "Utilisation"); + assert_eq!(get_message("common-tip"), "conseil"); + } + + unsafe { + if original_lang.is_empty() { + env::remove_var("LANG"); + } else { + env::set_var("LANG", original_lang); + } + } + } +} +/* spell-checker: enable */ diff --git a/src/uudiff/src/lib/mods/common_errors.rs b/src/uudiff/src/lib/mods/common_errors.rs new file mode 100644 index 0000000..b039800 --- /dev/null +++ b/src/uudiff/src/lib/mods/common_errors.rs @@ -0,0 +1,203 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +//! Common errors for all diffutils utilities. + +use std::ffi::OsString; + +use uucore::parser::parse_size::ParseSizeError; + +use crate::{error::UError, translate}; + +/// Contains common Core/DiffUtils errors and their text messages. +/// +/// Returns exit code 2, if a different exit code is required, +/// use [UtilsErrorCode] +/// +/// A typical way to return an std::io:Error as +/// Box (from [crate::error::UResult]) is: +/// Err => { +/// let io = error.map_err_context(|| path.to_string_lossy().to_string()); +/// return Err(UtilsError::Io(io).into()); +/// } +// Clone and PartialEq cannot be derived for Box. +#[derive(Debug)] +pub enum UtilsError { + /// When a util does not handle directories (e.g. cmp). + /// + /// Param: wrong operand (dir name) + DirectoryNotAllowed(OsString), + + /// Generic IO error, Display handled by [crate::error::UIoError] + Io(Box), + IoDouble(Box, Box), +} + +impl std::error::Error for UtilsError {} + +impl UError for UtilsError { + fn code(&self) -> i32 { + 2 + } +} + +impl From for UtilsError { + fn from(e: std::io::Error) -> Self { + Self::Io(e.into()) + } +} + +impl std::fmt::Display for UtilsError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let msg = match self { + Self::DirectoryNotAllowed(dir) => { + translate!("error-is-a-directory", "file" => dir.to_string_lossy()) + } + Self::Io(e) => { + // dbg!("Io"); + return e.fmt(f); + } + Self::IoDouble(e1, e2) => { + format!("{e1}\n{}: {e2}", uucore::util_name()) + } + }; + + write!(f, "{msg}") + } +} + +/// Like [UtilsError] with the option to specify the exit code. +/// +/// A typical way to return an std::io:Error as +/// Box (from [crate::error::UResult]) is: +/// Err => { +/// let io = error.map_err_context(|| path.to_string_lossy().to_string()); +/// return Err(UtilsErrorCode::new(UtilsError::Io(io), 4).into()); +/// } +#[derive(Debug)] +pub struct UtilsErrorCode { + pub utils_error: UtilsError, + pub code: i32, +} + +impl UtilsErrorCode { + pub fn new(utils_error: UtilsError, code: i32) -> Self { + Self { utils_error, code } + } +} + +impl std::error::Error for UtilsErrorCode {} + +impl UError for UtilsErrorCode { + fn code(&self) -> i32 { + self.code + } +} + +impl std::fmt::Display for UtilsErrorCode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.utils_error.fmt(f) + } +} + +/// Contains all parser errors and their text messages. +/// +/// All errors can be output easily using the normal Display functionality. +/// To format the error message for the typical diffutils output, use [format_error_text]. +#[derive(Debug, PartialEq, Eq)] +pub enum UParseError { + /// (Option, value, error) + ParseSizeError(&'static str, String, ParseSizeError), + + /// (Format options) + ConflictingOutputStyle(String, String), + + /// Having more operands than the four allowed (file_1, file_2, ign_1, ign_2) + /// + /// Params: (wrong operand) + ExtraOperand(OsString), + + InvalidContextLength(String), + InvalidUnifiedLength(String), + + /// Operand missing, e.g. diff without files + MissingOperand(String), + + /// Two options cannot be used together, e.g. cmp --silent and --verbose (output). + OptionsIncompatible(&'static str, &'static str), + + /// Error message for options available in GNU, but not yet here + NotYetImplemented(&'static str), +} + +impl std::error::Error for UParseError {} + +impl UError for UParseError { + fn code(&self) -> i32 { + 2 + } + + fn usage(&self) -> bool { + // TODO should not returns full path on try --help message + // Try '/home/gunnar/SynologyDrive/Development/diffutils_fork/target/debug/cmp --help' for more information. + true + } +} + +impl std::fmt::Display for UParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let msg = match self { + Self::ParseSizeError(option, value, e) => match e { + ParseSizeError::InvalidSuffix(_) => { + translate!( + "parse-error-invalid-value-unit", + "option" => option, + "value" => value + ) + } + ParseSizeError::ParseFailure(_) => { + translate!( + "parse-error-invalid-value", + "option" => option, + "value" => value + ) + } + ParseSizeError::SizeTooBig(_) => { + translate!( + "parse-error-invalid-value-overflow", + "option" => option, + "value" => value + ) + } + ParseSizeError::PhysicalMem(_value) => e.to_string(), + }, + + Self::ConflictingOutputStyle(opt_1, opt_2) => { + translate!("parse-error-conflicting-output-options", "opt1" => opt_1, "opt2" => opt_2) + } + Self::ExtraOperand(extra_operand) => { + translate!("parse-error-extra-operand", "operand" => extra_operand.to_string_lossy()) + } + Self::InvalidContextLength(value) => { + translate!("parse-error-invalid-context-length", "value" => value) + } + Self::InvalidUnifiedLength(value) => { + translate!("parse-error-invalid-unified-length", "value" => value) + } + Self::MissingOperand(after) => { + translate!("parse-error-missing-operand", "after" => after) + } + Self::OptionsIncompatible(option_1, option_2) => translate!( + "parse-error-incompatible-options", + "opt1" => option_1, + "opt2" => option_2, + ), + Self::NotYetImplemented(s) => { + translate!("parse-error-not-yet-implemented", "option" => s) + } + }; + write!(f, "{msg}") + } +} diff --git a/src/uudiff/src/lib/mods/error.rs b/src/uudiff/src/lib/mods/error.rs new file mode 100644 index 0000000..1ee38d8 --- /dev/null +++ b/src/uudiff/src/lib/mods/error.rs @@ -0,0 +1,805 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +//! All utils return exit with an exit code. Usually, the following scheme is used: +//! * `0`: succeeded +//! * `1`: minor problems +//! * `2`: major problems +//! +//! This module provides types to reconcile these exit codes with idiomatic Rust error +//! handling. This has a couple advantages over manually using [`std::process::exit`]: +//! 1. It enables the use of `?`, `map_err`, `unwrap_or`, etc. in `uumain`. +//! 1. It encourages the use of [`UResult`]/[`Result`] in functions in the utils. +//! 1. The error messages are largely standardized across utils. +//! 1. Standardized error messages can be created from external result types +//! (i.e. [`std::io::Result`] & `clap::ClapResult`). +//! 1. [`set_exit_code`] takes away the burden of manually tracking exit codes for non-fatal errors. +//! +//! # Usage +//! The signature of a typical util should be: +//! ```ignore +//! fn uumain(args: impl uucore::Args) -> UResult<()> { +//! ... +//! } +//! ``` +//! [`UResult`] is a simple wrapper around [`Result`] with a custom error trait: [`UError`]. The +//! most important difference with types implementing [`std::error::Error`] is that [`UError`]s +//! can specify the exit code of the program when they are returned from `uumain`: +//! * When `Ok` is returned, the code set with [`set_exit_code`] is used as exit code. If +//! [`set_exit_code`] was not used, then `0` is used. +//! * When `Err` is returned, the code corresponding with the error is used as exit code and the +//! error message is displayed. +//! +//! Additionally, the errors can be displayed manually with the [`crate::show`] and [`crate::show_if_err`] macros: +//! ```ignore +//! let res = Err(USimpleError::new(1, "Error!!")); +//! show_if_err!(res); +//! // or +//! if let Err(e) = res { +//! show!(e); +//! } +//! ``` +//! +//! **Note**: The [`crate::show`] and [`crate::show_if_err`] macros set the exit code of the program using +//! [`set_exit_code`]. See the documentation on that function for more information. +//! +//! # Guidelines +//! * Use error types from `uucore` where possible. +//! * Add error types to `uucore` if an error appears in multiple utils. +//! * Prefer proper custom error types over [`ExitCode`] and [`USimpleError`]. +//! * [`USimpleError`] may be used in small utils with simple error handling. +//! * Using [`ExitCode`] is not recommended but can be useful for converting utils to use +//! [`UResult`]. + +// spell-checker:ignore uioerror rustdoc + +use std::{ + cell::Cell, + error::Error, + fmt::{Display, Formatter}, + io::Write, + sync::atomic::{AtomicI32, Ordering}, +}; + +static EXIT_CODE: AtomicI32 = AtomicI32::new(0); + +/// Get the last exit code set with [`set_exit_code`]. +/// The default value is `0`. +pub fn get_exit_code() -> i32 { + EXIT_CODE.load(Ordering::SeqCst) +} + +/// Set the exit code for the program if `uumain` returns `Ok(())`. +/// +/// This function is most useful for non-fatal errors, for example when applying an operation to +/// multiple files: +/// ```ignore +/// use uucore::error::{UResult, set_exit_code}; +/// +/// fn uumain(args: impl uucore::Args) -> UResult<()> { +/// ... +/// for file in files { +/// let res = some_operation_that_might_fail(file); +/// match res { +/// Ok() => {}, +/// Err(_) => set_exit_code(1), +/// } +/// } +/// Ok(()) // If any of the operations failed, 1 is returned. +/// } +/// ``` +pub fn set_exit_code(code: i32) { + EXIT_CODE.store(code, Ordering::SeqCst); +} + +/// Result type that should be returned by all utils. +pub type UResult = Result>; + +/// Custom errors defined by the utils and `uucore`. +/// +/// All errors should implement [`std::error::Error`], [`std::fmt::Display`] and +/// [`std::fmt::Debug`] and have an additional `code` method that specifies the +/// exit code of the program if the error is returned from `uumain`. +/// +/// An example of a custom error from `ls`: +/// +/// ``` +/// use uucore::{ +/// display::Quotable, +/// error::{UError, UResult} +/// }; +/// use std::{ +/// error::Error, +/// fmt::{Display, Debug}, +/// path::PathBuf +/// }; +/// +/// #[derive(Debug)] +/// enum LsError { +/// InvalidLineWidth(String), +/// NoMetadata(PathBuf), +/// } +/// +/// impl UError for LsError { +/// fn code(&self) -> i32 { +/// match self { +/// LsError::InvalidLineWidth(_) => 2, +/// LsError::NoMetadata(_) => 1, +/// } +/// } +/// } +/// +/// impl Error for LsError {} +/// +/// impl Display for LsError { +/// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +/// match self { +/// LsError::InvalidLineWidth(s) => write!(f, "invalid line width: {}", s.quote()), +/// LsError::NoMetadata(p) => write!(f, "could not open file: {}", p.quote()), +/// } +/// } +/// } +/// ``` +/// +/// The main routine would look like this: +/// +/// ```ignore +/// #[uucore::main] +/// pub fn uumain(args: impl uucore::Args) -> UResult<()> { +/// // Perform computations here ... +/// return Err(LsError::InvalidLineWidth(String::from("test")).into()) +/// } +/// ``` +/// +/// The call to `into()` is required to convert the `LsError` to +/// [`Box`]. The implementation for `From` is provided automatically. +/// +/// A crate like [`quick_error`](https://crates.io/crates/quick-error) might +/// also be used, but will still require an `impl` for the `code` method. +pub trait UError: Error + Send { + /// Error code of a custom error. + /// + /// Set a return value for each variant of an enum-type to associate an + /// error code (which is returned to the system shell) with an error + /// variant. + /// + /// # Example + /// + /// ``` + /// use uucore::{ + /// display::Quotable, + /// error::UError + /// }; + /// use std::{ + /// error::Error, + /// fmt::{Display, Debug}, + /// path::PathBuf + /// }; + /// + /// #[derive(Debug)] + /// enum MyError { + /// Foo(String), + /// Bar(PathBuf), + /// Bing(), + /// } + /// + /// impl UError for MyError { + /// fn code(&self) -> i32 { + /// match self { + /// MyError::Foo(_) => 2, + /// // All other errors yield the same error code, there's no + /// // need to list them explicitly. + /// _ => 1, + /// } + /// } + /// } + /// + /// impl Error for MyError {} + /// + /// impl Display for MyError { + /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + /// use MyError as ME; + /// match self { + /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), + /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), + /// ME::Bing() => write!(f, "Exterminate!"), + /// } + /// } + /// } + /// ``` + fn code(&self) -> i32 { + 1 + } + + /// Print usage help to a custom error. + /// + /// Return true or false to control whether a short usage help is printed + /// below the error message. The usage help is in the format: "Try `{name} + /// --help` for more information." and printed only if `true` is returned. + /// + /// # Example + /// + /// ``` + /// use uucore::{ + /// display::Quotable, + /// error::UError + /// }; + /// use std::{ + /// error::Error, + /// fmt::{Display, Debug}, + /// path::PathBuf + /// }; + /// + /// #[derive(Debug)] + /// enum MyError { + /// Foo(String), + /// Bar(PathBuf), + /// Bing(), + /// } + /// + /// impl UError for MyError { + /// fn usage(&self) -> bool { + /// match self { + /// // This will have a short usage help appended + /// MyError::Bar(_) => true, + /// // These matches won't have a short usage help appended + /// _ => false, + /// } + /// } + /// } + /// + /// impl Error for MyError {} + /// + /// impl Display for MyError { + /// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + /// use MyError as ME; + /// match self { + /// ME::Foo(s) => write!(f, "Unknown Foo: {}", s.quote()), + /// ME::Bar(p) => write!(f, "Couldn't find Bar: {}", p.quote()), + /// ME::Bing() => write!(f, "Exterminate!"), + /// } + /// } + /// } + /// ``` + fn usage(&self) -> bool { + false + } +} + +impl From for Box +where + T: UError + 'static, +{ + fn from(t: T) -> Self { + Box::new(t) + } +} + +/// A simple error type with an exit code and a message that implements [`UError`]. +/// +/// ``` +/// use uucore::error::{UResult, USimpleError}; +/// let err = USimpleError { code: 1, message: "error!".into()}; +/// let res: UResult<()> = Err(err.into()); +/// // or using the `new` method: +/// let res: UResult<()> = Err(USimpleError::new(1, "error!")); +/// ``` +#[derive(Debug)] +pub struct USimpleError { + /// Exit code of the error. + pub code: i32, + + /// Error message. + pub message: String, +} + +impl USimpleError { + /// Create a new `USimpleError` with a given exit code and message. + #[allow(clippy::new_ret_no_self)] + pub fn new>(code: i32, message: S) -> Box { + Box::new(Self { + code, + message: message.into(), + }) + } +} + +impl Error for USimpleError {} + +impl Display for USimpleError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + self.message.fmt(f) + } +} + +impl UError for USimpleError { + fn code(&self) -> i32 { + self.code + } +} + +/// Wrapper type around [`std::io::Error`]. +#[derive(Debug)] +pub struct UUsageError { + /// Exit code of the error. + pub code: i32, + + /// Error message. + pub message: String, +} + +impl UUsageError { + #[allow(clippy::new_ret_no_self)] + /// Create a new `UUsageError` with a given exit code and message. + pub fn new>(code: i32, message: S) -> Box { + Box::new(Self { + code, + message: message.into(), + }) + } +} + +impl Error for UUsageError {} + +impl Display for UUsageError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + self.message.fmt(f) + } +} + +impl UError for UUsageError { + fn code(&self) -> i32 { + self.code + } + + fn usage(&self) -> bool { + true + } +} + +/// Wrapper type around [`std::io::Error`]. +/// +/// The messages displayed by [`UIoError`] should match the error messages displayed by GNU +/// coreutils. +/// +/// There are two ways to construct this type: with [`UIoError::new`] or by calling the +/// [`FromIo::map_err_context`] method on a [`std::io::Result`] or [`std::io::Error`]. +/// ``` +/// use uucore::{ +/// display::Quotable, +/// error::{FromIo, UResult, UIoError, UError} +/// }; +/// use std::fs::File; +/// use std::path::Path; +/// let path = Path::new("test.txt"); +/// +/// // Manual construction +/// let e: Box = UIoError::new( +/// std::io::ErrorKind::NotFound, +/// format!("cannot access {}", path.quote()) +/// ); +/// let res: UResult<()> = Err(e.into()); +/// +/// // Converting from an `std::io::Error`. +/// let res: UResult = File::open(path).map_err_context(|| format!("cannot access {}", path.quote())); +/// ``` +#[derive(Debug)] +pub struct UIoError { + context: Option, + inner: std::io::Error, +} + +impl UIoError { + #[allow(clippy::new_ret_no_self)] + /// Create a new `UIoError` with a given exit code and message. + pub fn new>(kind: std::io::ErrorKind, context: S) -> Box { + Box::new(Self { + context: Some(context.into()), + inner: kind.into(), + }) + } +} + +impl UError for UIoError {} + +impl Error for UIoError {} + +impl Display for UIoError { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + use std::io::ErrorKind::*; + + let message; + let message = if self.inner.raw_os_error().is_some() { + // These are errors that come directly from the OS. + // We want to normalize their messages across systems, + // and we want to strip the "(os error X)" suffix. + match self.inner.kind() { + NotFound => "No such file or directory", + PermissionDenied => "Permission denied", + ConnectionRefused => "Connection refused", + ConnectionReset => "Connection reset", + ConnectionAborted => "Connection aborted", + NotConnected => "Not connected", + AddrInUse => "Address in use", + AddrNotAvailable => "Address not available", + BrokenPipe => "Broken pipe", + AlreadyExists => "Already exists", + WouldBlock => "Would block", + InvalidInput => "Invalid input", + InvalidData => "Invalid data", + TimedOut => "Timed out", + WriteZero => "Write zero", + Interrupted => "Interrupted", + UnexpectedEof => "Unexpected end of file", + _ => { + // TODO: When the new error variants + // (https://github.com/rust-lang/rust/issues/86442) + // are stabilized, we should add them to the match statement. + message = strip_errno(&self.inner); + &message + } + } + } else { + // These messages don't need as much normalization, and the above + // messages wouldn't always be a good substitute. + // For example, ErrorKind::NotFound doesn't necessarily mean it was + // a file that was not found. + // There are also errors with entirely custom messages. + message = self.inner.to_string(); + &message + }; + if let Some(ctx) = &self.context { + write!(f, "{ctx}: {message}") + } else { + write!(f, "{message}") + } + } +} + +/// Strip the trailing " (os error XX)" from io error strings. +pub fn strip_errno(err: &std::io::Error) -> String { + let mut msg = err.to_string(); + if let Some(pos) = msg.find(" (os error ") { + msg.truncate(pos); + } + msg +} + +/// Enables the conversion from [`std::io::Error`] to [`UError`] and from [`std::io::Result`] to +/// [`UResult`]. +pub trait FromIo { + /// Map the error context of an [`std::io::Error`] or [`std::io::Result`] to a custom error + fn map_err_context(self, context: impl FnOnce() -> String) -> T; +} + +impl FromIo> for std::io::Error { + fn map_err_context(self, context: impl FnOnce() -> String) -> Box { + Box::new(UIoError { + context: Some(context()), + inner: self, + }) + } +} + +impl FromIo> for std::io::Result { + fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { + self.map_err(|e| e.map_err_context(context) as Box) + } +} + +impl FromIo> for std::io::ErrorKind { + fn map_err_context(self, context: impl FnOnce() -> String) -> Box { + Box::new(UIoError { + context: Some(context()), + inner: std::io::Error::new(self, ""), + }) + } +} + +impl From for UIoError { + fn from(f: std::io::Error) -> Self { + Self { + context: None, + inner: f, + } + } +} + +impl From for Box { + fn from(f: std::io::Error) -> Self { + let u_error: UIoError = f.into(); + Box::new(u_error) as Self + } +} + +/// Enables the conversion from [`Result`] to [`UResult`]. +/// +/// # Examples +/// +/// ``` +/// use uudiff::error::FromIo; +/// use nix::errno::Errno; +/// +/// let nix_err = Err::<(), nix::Error>(Errno::EACCES); +/// let uio_result = nix_err.map_err_context(|| String::from("fix me please!")); +/// +/// // prints "fix me please!: Permission denied" +/// println!("{}", uio_result.unwrap_err()); +/// ``` +#[cfg(unix)] +impl FromIo> for Result { + fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { + self.map_err(|e| { + Box::new(UIoError { + context: Some(context()), + inner: std::io::Error::from_raw_os_error(e as i32), + }) as Box + }) + } +} + +#[cfg(unix)] +impl FromIo> for nix::Error { + fn map_err_context(self, context: impl FnOnce() -> String) -> UResult { + Err(Box::new(UIoError { + context: Some(context()), + inner: std::io::Error::from_raw_os_error(self as i32), + }) as Box) + } +} + +#[cfg(unix)] +impl From for UIoError { + fn from(f: nix::Error) -> Self { + Self { + context: None, + inner: std::io::Error::from_raw_os_error(f as i32), + } + } +} + +#[cfg(unix)] +impl From for Box { + fn from(f: nix::Error) -> Self { + let u_error: UIoError = f.into(); + Box::new(u_error) as Self + } +} + +/// Shorthand to construct [`UIoError`]-instances. +/// +/// This macro serves as a convenience call to quickly construct instances of +/// [`UIoError`]. It takes: +/// +/// - An instance of [`std::io::Error`] +/// - A `format!`-compatible string and +/// - An arbitrary number of arguments to the format string +/// +/// In exactly this order. It is equivalent to the more verbose code seen in the +/// example. +/// +/// # Examples +/// +/// ``` +/// use uucore::error::UIoError; +/// use uucore::uio_error; +/// +/// let io_err = std::io::Error::new( +/// std::io::ErrorKind::PermissionDenied, "fix me please!" +/// ); +/// +/// let uio_err = UIoError::new( +/// io_err.kind(), +/// format!("Error code: {}", 2) +/// ); +/// +/// let other_uio_err = uio_error!(io_err, "Error code: {}", 2); +/// +/// // prints "fix me please!: Permission denied" +/// println!("{uio_err}"); +/// // prints "Error code: 2: Permission denied" +/// println!("{other_uio_err}"); +/// ``` +/// +/// The [`std::fmt::Display`] impl of [`UIoError`] will then ensure that an +/// appropriate error message relating to the actual error kind of the +/// [`std::io::Error`] is appended to whatever error message is defined in +/// addition (as secondary argument). +/// +/// If you want to show only the error message for the [`std::io::ErrorKind`] +/// that's contained in [`UIoError`], pass the second argument as empty string: +/// +/// ``` +/// use uucore::error::UIoError; +/// use uucore::uio_error; +/// +/// let io_err = std::io::Error::new( +/// std::io::ErrorKind::PermissionDenied, "fix me please!" +/// ); +/// +/// let other_uio_err = uio_error!(io_err, ""); +/// +/// // prints: ": Permission denied" +/// println!("{other_uio_err}"); +/// ``` +//#[macro_use] +#[macro_export] +macro_rules! uio_error( + ($err:expr, $($args:tt)+) => ({ + UIoError::new( + $err.kind(), + format!($($args)+) + ) + }) +); + +/// A special error type that does not print any message when returned from +/// `uumain`. Especially useful for porting utilities to using [`UResult`]. +/// +/// There are two ways to construct an [`ExitCode`]: +/// ``` +/// use uucore::error::{ExitCode, UResult}; +/// // Explicit +/// let res: UResult<()> = Err(ExitCode(1).into()); +/// +/// // Using into on `i32`: +/// let res: UResult<()> = Err(1.into()); +/// ``` +/// This type is especially useful for a trivial conversion from utils returning [`i32`] to +/// returning [`UResult`]. +#[derive(Debug)] +pub struct ExitCode(pub i32); + +impl ExitCode { + #[allow(clippy::new_ret_no_self)] + /// Create a new `ExitCode` with a given exit code. + pub fn new(code: i32) -> Box { + Box::new(Self(code)) + } +} + +impl Error for ExitCode {} + +impl Display for ExitCode { + fn fmt(&self, _: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + Ok(()) + } +} + +impl UError for ExitCode { + fn code(&self) -> i32 { + self.0 + } +} + +impl From for Box { + fn from(i: i32) -> Self { + ExitCode::new(i) + } +} + +/// A wrapper for `clap::Error` that implements [`UError`] +/// +/// Contains a custom error code. When `Display::fmt` is called on this struct +/// the [`clap::Error`] will be printed _directly to `stdout` or `stderr`_. +/// This is because `clap` only supports colored output when it prints directly. +/// +/// [`ClapErrorWrapper`] is generally created by calling the +/// [`UClapError::with_exit_code`] method on [`clap::Error`] or using the [`From`] +/// implementation from [`clap::Error`] to `Box`, which constructs +/// a [`ClapErrorWrapper`] with an exit code of `1`. +/// +/// ```rust +/// use uucore::error::{ClapErrorWrapper, UError, UClapError}; +/// let command = clap::Command::new("test"); +/// let result: Result<_, ClapErrorWrapper> = command.try_get_matches().with_exit_code(125); +/// +/// let command = clap::Command::new("test"); +/// let result: Result<_, Box> = command.try_get_matches().map_err(Into::into); +/// ``` +#[derive(Debug)] +pub struct ClapErrorWrapper { + code: i32, + error: clap::Error, + print_failed: Cell, +} + +/// Extension trait for `clap::Error` to adjust the exit code. +pub trait UClapError { + /// Set the exit code for the program if `uumain` returns `Ok(())`. + fn with_exit_code(self, code: i32) -> T; +} + +impl From for Box { + fn from(e: clap::Error) -> Self { + dbg!(&e); + Box::new(ClapErrorWrapper { + code: 1, + error: e, + print_failed: Cell::new(false), + }) + } +} + +impl UClapError for clap::Error { + fn with_exit_code(self, code: i32) -> ClapErrorWrapper { + ClapErrorWrapper { + code, + error: self, + print_failed: Cell::new(false), + } + } +} + +impl UClapError> + for Result +{ + fn with_exit_code(self, code: i32) -> Result { + self.map_err(|e| e.with_exit_code(code)) + } +} + +impl UError for ClapErrorWrapper { + fn code(&self) -> i32 { + // If the error is a DisplayHelp or DisplayVersion variant, + // check if printing failed. If it did, return 1, otherwise 0. + if let clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion = + self.error.kind() + { + i32::from(self.print_failed.get()) + } else { + self.code + } + } +} + +impl Error for ClapErrorWrapper {} + +// This is abuse of the Display trait +impl Display for ClapErrorWrapper { + fn fmt(&self, _f: &mut Formatter<'_>) -> Result<(), std::fmt::Error> { + // Check if printing succeeds. For DisplayHelp and DisplayVersion, + // error.print() writes to stdout, so we need to detect write failures + // (e.g., when stdout is /dev/full). + if let Err(print_fail) = self.error.print() { + // Mark that printing failed so code() can return the appropriate exit code + self.print_failed.set(true); + // Try to display this error to stderr, but ignore if that fails too + // since we're already in an error state. + let _ = writeln!(std::io::stderr(), "{}: {print_fail}", uucore::util_name()); + // Mirror GNU behavior: when failing to print help or version, exit with error code. + // This avoids silent failures when stdout is full or closed. + set_exit_code(1); + } + // Always return Ok(()) to satisfy Display's contract and prevent panic + Ok(()) + } +} + +#[cfg(test)] +mod tests { + #[test] + #[cfg(unix)] + fn test_nix_error_conversion() { + use super::{FromIo, UIoError}; + use nix::errno::Errno; + use std::io::ErrorKind; + + for (nix_error, expected_error_kind) in [ + (Errno::EACCES, ErrorKind::PermissionDenied), + (Errno::ENOENT, ErrorKind::NotFound), + (Errno::EEXIST, ErrorKind::AlreadyExists), + ] { + let error = UIoError::from(nix_error); + assert_eq!(expected_error_kind, error.inner.kind()); + } + assert_eq!( + "test: Permission denied", + Err::<(), nix::Error>(Errno::EACCES) + .map_err_context(|| String::from("test")) + .unwrap_err() + .to_string() + ); + } +} diff --git a/src/uudiff/src/lib/mods/locale.rs b/src/uudiff/src/lib/mods/locale.rs new file mode 100644 index 0000000..39056f1 --- /dev/null +++ b/src/uudiff/src/lib/mods/locale.rs @@ -0,0 +1,1463 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:disable + +use uucore::error::UError; + +use fluent::{self, FluentArgs, FluentBundle, FluentResource}; +use fluent_syntax::parser::ParserError; + +use std::cell::Cell; +use std::fs; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::OnceLock; + +use os_display::Quotable; +use thiserror::Error; +use unic_langid::LanguageIdentifier; + +#[derive(Error, Debug)] +pub enum LocalizationError { + #[error("I/O error loading '{path}': {source}")] + Io { + source: std::io::Error, + path: PathBuf, + }, + #[error("Parse-locale error: {0}")] + ParseLocale(String), + #[error("Resource parse error at '{snippet}': {error:?}")] + ParseResource { + #[source] + error: ParserError, + snippet: String, + }, + #[error("Bundle error: {0}")] + Bundle(String), + #[error("Locales directory not found: {0}")] + LocalesDirNotFound(String), + #[error("Path resolution error: {0}")] + PathResolution(String), +} + +impl From for LocalizationError { + fn from(error: std::io::Error) -> Self { + Self::Io { + source: error, + path: PathBuf::from(""), + } + } +} + +// Add a generic way to convert LocalizationError to UError +impl UError for LocalizationError { + fn code(&self) -> i32 { + 1 + } +} + +pub const DEFAULT_LOCALE: &str = "en-US"; + +// Include embedded locale files as fallback +include!(concat!(env!("OUT_DIR"), "/embedded_locales.rs")); + +// A struct to handle localization with optional English fallback +struct Localizer { + primary_bundle: FluentBundle, + fallback_bundle: Option>, +} + +impl Localizer { + fn new(primary_bundle: FluentBundle) -> Self { + Self { + primary_bundle, + fallback_bundle: None, + } + } + + fn with_fallback(mut self, fallback_bundle: FluentBundle) -> Self { + self.fallback_bundle = Some(fallback_bundle); + self + } + + fn format(&self, id: &str, args: Option<&FluentArgs>) -> String { + // Try primary bundle first + if let Some(message) = self.primary_bundle.get_message(id).and_then(|m| m.value()) { + let mut errs = Vec::new(); + return self + .primary_bundle + .format_pattern(message, args, &mut errs) + .to_string(); + } + + // Fall back to English bundle if available + if let Some(ref fallback) = self.fallback_bundle { + if let Some(message) = fallback.get_message(id).and_then(|m| m.value()) { + let mut errs = Vec::new(); + return fallback + .format_pattern(message, args, &mut errs) + .to_string(); + } + } + + // Return the key ID if not found anywhere + id.to_string() + } +} + +// Global localizer stored in thread-local OnceLock +thread_local! { + static LOCALIZER: OnceLock = const { OnceLock::new() }; +} + +/// Helper function to find the uudiff locales directory from a utility's locales directory +fn find_uudiff_locales_dir(utility_locales_dir: &Path) -> Option { + // Normalize the path to get absolute path + let normalized_dir = utility_locales_dir + .canonicalize() + .unwrap_or_else(|_| utility_locales_dir.to_path_buf()); + + // Walk up: locales -> printenv -> uu -> src + let uudiff_locales = normalized_dir + .parent()? // printenv + .parent()? // uu + .parent()? // src + .join("uudiff") + .join("locales"); + + // Only return if the directory actually exists + uudiff_locales.exists().then_some(uudiff_locales) +} + +/// Create a bundle that combines common and utility-specific strings +fn create_bundle( + locale: &LanguageIdentifier, + locales_dir: &Path, + util_name: &str, +) -> Result, LocalizationError> { + let mut bundle = FluentBundle::new(vec![locale.clone()]); + + // Disable Unicode directional isolate characters + bundle.set_use_isolating(false); + + let mut try_add_resource_from = |dir_opt: Option| { + if let Some(resource) = dir_opt + .map(|dir| dir.join(format!("{locale}.ftl"))) + .and_then(|locale_path| fs::read_to_string(locale_path).ok()) + .and_then(|ftl| FluentResource::try_new(ftl).ok()) + { + bundle.add_resource_overriding(resource); + } + }; + + // Load common strings from uudiff locales directory + try_add_resource_from(find_uudiff_locales_dir(locales_dir)); + // Then, try to load utility-specific strings from the utility's locale directory + try_add_resource_from(get_locales_dir(util_name).ok()); + + // checksum binaries also require fluent files from the checksum_common crate + if [ + "cksum", + "b2sum", + "md5sum", + "sha1sum", + "sha224sum", + "sha256sum", + "sha384sum", + "sha512sum", + ] + .contains(&util_name) + { + try_add_resource_from(get_locales_dir("checksum_common").ok()); + } + + // If we have at least one resource, return the bundle + if bundle.has_message("common-error") || bundle.has_message(&format!("{util_name}-about")) { + Ok(bundle) + } else { + Err(LocalizationError::LocalesDirNotFound(format!( + "No localization strings found for {locale} and utility {util_name}" + ))) + } +} + +/// Initialize localization with common strings in addition to utility-specific strings +fn init_localization( + locale: &LanguageIdentifier, + locales_dir: &Path, + util_name: &str, +) -> Result<(), LocalizationError> { + let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) + .expect("Default locale should always be valid"); + + // Try to create a bundle that combines common and utility-specific strings + let english_bundle = create_bundle(&default_locale, locales_dir, util_name).or_else(|_| { + // Fallback to embedded utility-specific and common strings + create_english_bundle_from_embedded(&default_locale, util_name) + })?; + + let loc = if locale == &default_locale { + // If requesting English, just use English as primary (no fallback needed) + Localizer::new(english_bundle) + } else { + // Try to load the requested locale with common strings + if let Ok(primary_bundle) = create_bundle(locale, locales_dir, util_name) { + // Successfully loaded requested locale, load English as fallback + Localizer::new(primary_bundle).with_fallback(english_bundle) + } else { + // Failed to load requested locale, just use English as primary + Localizer::new(english_bundle) + } + }; + + LOCALIZER.with(|lock| { + lock.set(loc) + .map_err(|_| LocalizationError::Bundle("Localizer already initialized".into())) + })?; + Ok(()) +} + +/// Helper function to parse FluentResource from content string +fn parse_fluent_resource(content: &str) -> Result { + FluentResource::try_new(content.to_string()).map_err( + |(_partial_resource, errs): (FluentResource, Vec)| { + if let Some(first_err) = errs.into_iter().next() { + let snippet = first_err + .slice + .clone() + .and_then(|range| content.get(range)) + .unwrap_or("") + .to_string(); + LocalizationError::ParseResource { + error: first_err, + snippet, + } + } else { + LocalizationError::LocalesDirNotFound("Parse error without details".to_string()) + } + }, + ) +} + +/// Create a bundle from embedded English locale files with common uudiff strings +fn create_english_bundle_from_embedded( + locale: &LanguageIdentifier, + util_name: &str, +) -> Result, LocalizationError> { + // Only support English from embedded files + if *locale != "en-US" { + return Err(LocalizationError::LocalesDirNotFound( + "Embedded locales only support en-US".to_string(), + )); + } + + let mut bundle = FluentBundle::new(vec![locale.clone()]); + bundle.set_use_isolating(false); + + // First, try to load common uudiff strings + if let Some(uudiff_content) = get_embedded_locale("uudiff/en-US.ftl") { + let uudiff_resource = parse_fluent_resource(uudiff_content)?; + bundle.add_resource_overriding(uudiff_resource); + } + + // // Checksum algorithms need locale messages from checksum_common + // if util_name.ends_with("sum") { + // if let Some(uucore_content) = get_embedded_locale("checksum_common/en-US.ftl") { + // let uucore_resource = parse_fluent_resource(uucore_content)?; + // bundle.add_resource_overriding(uucore_resource); + // } + // } + + // Then, try to load utility-specific strings + let locale_key = format!("{util_name}/en-US.ftl"); + if let Some(ftl_content) = get_embedded_locale(&locale_key) { + let resource = parse_fluent_resource(ftl_content)?; + bundle.add_resource_overriding(resource); + } + + // Return the bundle if we have either common strings or utility-specific strings + if bundle.has_message("common-error") || bundle.has_message(&format!("{util_name}-about")) { + Ok(bundle) + } else { + Err(LocalizationError::LocalesDirNotFound(format!( + "No embedded locale found for {util_name} and no common strings found" + ))) + } +} + +fn get_message_internal(id: &str, args: Option) -> String { + LOCALIZER.with(|lock| { + lock.get() + .map_or_else(|| id.to_string(), |loc| loc.format(id, args.as_ref())) + // Return the key ID if localizer not initialized + }) +} + +/// Retrieves a localized message by its identifier. +/// +/// Looks up a message with the given ID in the current locale bundle and returns +/// the localized text. If the message ID is not found in the current locale, +/// it will fall back to English. If the message is not found in English either, +/// returns the message ID itself. +/// +/// # Arguments +/// +/// * `id` - The message identifier in the Fluent resources +/// +/// # Returns +/// +/// A `String` containing the localized message, or the message ID if not found +/// +/// # Examples +/// +/// ``` +/// use uudiff::locale::get_message; +/// +/// // Get a localized greeting (from .ftl files) +/// let greeting = get_message("greeting"); +/// println!("{greeting}"); +/// ``` +pub fn get_message(id: &str) -> String { + get_message_internal(id, None) +} + +/// Retrieves a localized message with variable substitution. +/// +/// Looks up a message with the given ID in the current locale bundle, +/// substitutes variables from the provided arguments map, and returns the +/// localized text. If the message ID is not found in the current locale, +/// it will fall back to English. If the message is not found in English either, +/// returns the message ID itself. +/// +/// # Arguments +/// +/// * `id` - The message identifier in the Fluent resources +/// * `ftl_args` - Key-value pairs for variable substitution in the message +/// +/// # Returns +/// +/// A `String` containing the localized message with variable substitution, or the message ID if not found +/// +/// # Examples +/// +/// ``` +/// use uudiff::locale::get_message_with_args; +/// use fluent::FluentArgs; +/// +/// // For a Fluent message like: "Hello, { $name }! You have { $count } notifications." +/// let mut args = FluentArgs::new(); +/// args.set("name".to_string(), "Alice".to_string()); +/// args.set("count".to_string(), 3); +/// +/// let message = get_message_with_args("notification", args); +/// println!("{message}"); +/// ``` +pub fn get_message_with_args(id: &str, ftl_args: FluentArgs) -> String { + get_message_internal(id, Some(ftl_args)) +} + +/// Function to detect system locale from environment variables +fn detect_system_locale() -> Result { + let locale_str = std::env::var("LANG") + .unwrap_or_else(|_| DEFAULT_LOCALE.to_string()) + .split('.') + .next() + .unwrap_or(DEFAULT_LOCALE) + .to_string(); + LanguageIdentifier::from_str(&locale_str).map_err(|_| { + LocalizationError::ParseLocale(format!("Failed to parse locale: {locale_str}")) + }) +} + +/// Sets up localization using the system locale with English fallback. +/// Always loads common strings in addition to utility-specific strings. +/// +/// This function initializes the localization system based on the system's locale +/// preferences (via the LANG environment variable) or falls back to English +/// if the system locale cannot be determined or the locale file doesn't exist. +/// English is always loaded as a fallback. +/// +/// # Arguments +/// +/// * `p` - Path to the directory containing localization (.ftl) files +/// +/// # Returns +/// +/// * `Ok(())` if initialization succeeds +/// * `Err(LocalizationError)` if initialization fails +/// +/// # Errors +/// +/// Returns a `LocalizationError` if: +/// * The en-US.ftl file cannot be read (English is required) +/// * The files contain invalid Fluent syntax +/// * The bundle cannot be initialized properly +/// +/// # Examples +/// +/// ``` +/// use uudiff::locale::setup_localization; +/// +/// // Initialize localization using files in the "locales" directory +/// // Make sure you have at least an "en-US.ftl" file in this directory +/// // Other locale files like "fr-FR.ftl" are optional +/// match setup_localization("./locales") { +/// Ok(_) => println!("Localization initialized successfully"), +/// Err(e) => eprintln!("Failed to initialize localization: {e}"), +/// } +/// ``` +pub fn setup_localization(p: &str) -> Result<(), LocalizationError> { + // Avoid duplicated and high-cost localizer setup + thread_local! { + static LOCALIZER_IS_SET: Cell = const { Cell::new(false) }; + } + if LOCALIZER_IS_SET.with(Cell::get) { + return Ok(()); + } + + let locale = detect_system_locale().unwrap_or_else(|_| { + LanguageIdentifier::from_str(DEFAULT_LOCALE).expect("Default locale should always be valid") + }); + + // Load common strings along with utility-specific strings + if let Ok(locales_dir) = get_locales_dir(p) { + // Load both utility-specific and common strings + init_localization(&locale, &locales_dir, p)?; + } else { + // No locales directory found, use embedded English with common strings directly + let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) + .expect("Default locale should always be valid"); + let english_bundle = create_english_bundle_from_embedded(&default_locale, p)?; + let localizer = Localizer::new(english_bundle); + + LOCALIZER.with(|lock| { + lock.set(localizer) + .map_err(|_| LocalizationError::Bundle("Localizer already initialized".into())) + })?; + } + LOCALIZER_IS_SET.with(|f| f.set(true)); + Ok(()) +} + +#[cfg(not(debug_assertions))] +fn resolve_locales_dir_from_exe_dir(exe_dir: &Path, p: &str) -> Option { + // 1. /locales/ + let coreutils = exe_dir.join("locales").join(p); + if coreutils.exists() { + return Some(coreutils); + } + + // 2. /share/locales/ + if let Some(prefix) = exe_dir.parent() { + let fhs = prefix.join("share").join("locales").join(p); + if fhs.exists() { + return Some(fhs); + } + } + + // 3. / (legacy fall-back) + let fallback = exe_dir.join(p); + if fallback.exists() { + return Some(fallback); + } + + None +} + +/// Helper function to get the locales directory based on the build configuration +fn get_locales_dir(p: &str) -> Result { + #[cfg(debug_assertions)] + { + // During development, use the project's locales directory + let manifest_dir = env!("CARGO_MANIFEST_DIR"); + // from uudiff path, load the locales directory from the program directory + let dev_path = PathBuf::from(manifest_dir) + .join("../uu") + .join(p) + .join("locales"); + + if dev_path.exists() { + return Ok(dev_path); + } + + // Fallback for development if the expected path doesn't exist + let fallback_dev_path = PathBuf::from(manifest_dir).join(p); + if fallback_dev_path.exists() { + return Ok(fallback_dev_path); + } + + Err(LocalizationError::LocalesDirNotFound(format!( + "Development locales directory not found at {} or {}", + dev_path.quote(), + fallback_dev_path.quote() + ))) + } + + #[cfg(not(debug_assertions))] + { + use std::env; + // In release builds, look relative to executable + let exe_path = env::current_exe().map_err(|e| { + LocalizationError::PathResolution(format!("Failed to get executable path: {e}")) + })?; + + let exe_dir = exe_path.parent().ok_or_else(|| { + LocalizationError::PathResolution("Failed to get executable directory".to_string()) + })?; + + if let Some(dir) = resolve_locales_dir_from_exe_dir(exe_dir, p) { + return Ok(dir); + } + + Err(LocalizationError::LocalesDirNotFound(format!( + "Release locales directory not found starting from {}", + exe_dir.quote() + ))) + } +} + +/// Macro for retrieving localized messages with optional arguments. +/// +/// This macro provides a unified interface for both simple message retrieval +/// and message retrieval with variable substitution. It accepts a message ID +/// and optionally key-value pairs using the `"key" => value` syntax. +/// +/// # Arguments +/// +/// * `$id` - The message identifier string +/// * Optional key-value pairs in the format `"key" => value` +/// +/// # Examples +/// +/// ``` +/// use uudiff::translate; +/// use fluent::FluentArgs; +/// +/// // Simple message without arguments +/// let greeting = translate!("greeting"); +/// +/// // Message with one argument +/// let welcome = translate!("welcome", "name" => "Alice"); +/// +/// // Message with multiple arguments +/// let username = "user name"; +/// let item_count = 2; +/// let notification = translate!( +/// "user-stats", +/// "name" => username, +/// "count" => item_count, +/// "status" => "active" +/// ); +/// ``` +#[macro_export] +macro_rules! translate { + // Case 1: Message ID only (no arguments) + ($id:expr) => { + $crate::locale::get_message($id) + }; + + // Case 2: Message ID with key-value arguments + ($id:expr, $($key:expr => $value:expr),+ $(,)?) => { + { + let mut args = fluent::FluentArgs::new(); + $( + let value_str = $value.to_string(); + match value_str.parse::() { + Ok(num_val) => args.set($key, num_val), + Err(e) => { + if *e.kind() == std::num::IntErrorKind::PosOverflow { + // Keep as string if not a number + // float will round the number which is not the input anymore + args.set($key, value_str); + } else if let Ok(float_val) = value_str.parse::() { + args.set($key, float_val); + } else { + // Keep as string if not a number + args.set($key, value_str); + } + }, + } + // if let Ok(num_val) = value_str.parse::() { + // args.set($key, num_val); + // } else if let Ok(float_val) = value_str.parse::() { + // args.set($key, float_val); + // } else { + // // Keep as string if not a number + // args.set($key, value_str); + // } + )+ + $crate::locale::get_message_with_args($id, args) + } + }; +} + +// Re-export the macro for easier access +pub use translate; + +#[cfg(test)] +mod tests { + use super::*; + use std::env; + use std::fs; + use std::path::PathBuf; + use tempfile::TempDir; + + /// Test-specific helper function to create a bundle from test directory only + #[cfg(test)] + fn create_test_bundle( + locale: &LanguageIdentifier, + test_locales_dir: &Path, + ) -> Result, LocalizationError> { + let mut bundle = FluentBundle::new(vec![locale.clone()]); + bundle.set_use_isolating(false); + + // Only load from the test directory - no common strings or utility-specific paths + let locale_path = test_locales_dir.join(format!("{locale}.ftl")); + if let Ok(ftl_content) = fs::read_to_string(&locale_path) { + let resource = parse_fluent_resource(&ftl_content)?; + bundle.add_resource_overriding(resource); + return Ok(bundle); + } + + Err(LocalizationError::LocalesDirNotFound(format!( + "No localization strings found for {locale} in {}", + test_locales_dir.quote() + ))) + } + + /// Test-specific initialization function for test directories + #[cfg(test)] + fn init_test_localization( + locale: &LanguageIdentifier, + test_locales_dir: &Path, + ) -> Result<(), LocalizationError> { + let default_locale = LanguageIdentifier::from_str(DEFAULT_LOCALE) + .expect("Default locale should always be valid"); + + // Create English bundle from test directory + let english_bundle = create_test_bundle(&default_locale, test_locales_dir)?; + + let loc = if locale == &default_locale { + // If requesting English, just use English as primary + Localizer::new(english_bundle) + } else { + // Try to load the requested locale from test directory + if let Ok(primary_bundle) = create_test_bundle(locale, test_locales_dir) { + // Successfully loaded requested locale, load English as fallback + Localizer::new(primary_bundle).with_fallback(english_bundle) + } else { + // Failed to load requested locale, just use English as primary + Localizer::new(english_bundle) + } + }; + + LOCALIZER.with(|lock| { + lock.set(loc) + .map_err(|_| LocalizationError::Bundle("Localizer already initialized".into())) + })?; + Ok(()) + } + + /// Helper function to create a temporary directory with test locale files + fn create_test_locales_dir() -> TempDir { + let temp_dir = TempDir::new().expect("Failed to create temp directory"); + + // Create en-US.ftl + let en_content = r" +greeting = Hello, world! +welcome = Welcome, { $name }! +count-items = You have { $count -> + [one] { $count } item + *[other] { $count } items +} +missing-in-other = This message only exists in English +"; + + // Create fr-FR.ftl + let fr_content = r" +greeting = Bonjour, le monde! +welcome = Bienvenue, { $name }! +count-items = Vous avez { $count -> + [one] { $count } élément + *[other] { $count } éléments +} +"; + + // Create ja-JP.ftl (Japanese) + let ja_content = r" +greeting = こんにちは、世界! +welcome = ようこそ、{ $name }さん! +count-items = { $count }個のアイテムがあります +"; + + // Create ar-SA.ftl (Arabic - Right-to-Left) + let ar_content = r" +greeting = أهلاً بالعالم! +welcome = أهلاً وسهلاً، { $name }! +count-items = لديك { $count -> + [zero] لا عناصر + [one] عنصر واحد + [two] عنصران + [few] { $count } عناصر + *[other] { $count } عنصر +} +"; + + // Create es-ES.ftl with invalid syntax + let es_invalid_content = r" +greeting = Hola, mundo! +invalid-syntax = This is { $missing +"; + + fs::write(temp_dir.path().join("en-US.ftl"), en_content) + .expect("Failed to write en-US.ftl"); + fs::write(temp_dir.path().join("fr-FR.ftl"), fr_content) + .expect("Failed to write fr-FR.ftl"); + fs::write(temp_dir.path().join("ja-JP.ftl"), ja_content) + .expect("Failed to write ja-JP.ftl"); + fs::write(temp_dir.path().join("ar-SA.ftl"), ar_content) + .expect("Failed to write ar-SA.ftl"); + fs::write(temp_dir.path().join("es-ES.ftl"), es_invalid_content) + .expect("Failed to write es-ES.ftl"); + + temp_dir + } + + #[test] + fn test_create_bundle_success() { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("en-US").unwrap(); + + let result = create_test_bundle(&locale, temp_dir.path()); + assert!(result.is_ok()); + + let bundle = result.unwrap(); + assert!(bundle.get_message("greeting").is_some()); + } + + #[test] + fn test_create_bundle_file_not_found() { + let temp_dir = TempDir::new().unwrap(); + let locale = LanguageIdentifier::from_str("de-DE").unwrap(); + + let result = create_test_bundle(&locale, temp_dir.path()); + assert!(result.is_err()); + + if let Err(LocalizationError::LocalesDirNotFound(_)) = result { + // Expected - no localization strings found + } else { + panic!("Expected LocalesDirNotFound error"); + } + } + + #[test] + fn test_create_bundle_invalid_syntax() { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("es-ES").unwrap(); + + let result = create_test_bundle(&locale, temp_dir.path()); + + // The result should be an error due to invalid syntax + match result { + Err(LocalizationError::ParseResource { + error: _parser_err, + snippet: _, + }) => { + // Expected ParseResource variant - test passes + } + Ok(_) => { + panic!("Expected ParseResource error, but bundle was created successfully"); + } + Err(other) => { + panic!("Expected ParseResource error, but got: {other:?}"); + } + } + } + + #[test] + fn test_localizer_format_primary_bundle() { + let temp_dir = create_test_locales_dir(); + let en_bundle = create_test_bundle( + &LanguageIdentifier::from_str("en-US").unwrap(), + temp_dir.path(), + ) + .unwrap(); + + let localizer = Localizer::new(en_bundle); + let result = localizer.format("greeting", None); + assert_eq!(result, "Hello, world!"); + } + + #[test] + fn test_localizer_format_with_args() { + use fluent::FluentArgs; + let temp_dir = create_test_locales_dir(); + let en_bundle = create_test_bundle( + &LanguageIdentifier::from_str("en-US").unwrap(), + temp_dir.path(), + ) + .unwrap(); + + let localizer = Localizer::new(en_bundle); + let mut args = FluentArgs::new(); + args.set("name", "Alice"); + + let result = localizer.format("welcome", Some(&args)); + assert_eq!(result, "Welcome, Alice!"); + } + + #[test] + fn test_localizer_fallback_to_english() { + let temp_dir = create_test_locales_dir(); + let fr_bundle = create_test_bundle( + &LanguageIdentifier::from_str("fr-FR").unwrap(), + temp_dir.path(), + ) + .unwrap(); + let en_bundle = create_test_bundle( + &LanguageIdentifier::from_str("en-US").unwrap(), + temp_dir.path(), + ) + .unwrap(); + + let localizer = Localizer::new(fr_bundle).with_fallback(en_bundle); + + // This message exists in French + let result1 = localizer.format("greeting", None); + assert_eq!(result1, "Bonjour, le monde!"); + + // This message only exists in English, should fallback + let result2 = localizer.format("missing-in-other", None); + assert_eq!(result2, "This message only exists in English"); + } + + #[test] + fn test_localizer_format_message_not_found() { + let temp_dir = create_test_locales_dir(); + let en_bundle = create_test_bundle( + &LanguageIdentifier::from_str("en-US").unwrap(), + temp_dir.path(), + ) + .unwrap(); + + let localizer = Localizer::new(en_bundle); + let result = localizer.format("nonexistent-message", None); + assert_eq!(result, "nonexistent-message"); + } + + #[test] + fn test_init_localization_english_only() { + // Run in a separate thread to avoid conflicts with other tests + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("en-US").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test that we can get messages + let message = get_message("greeting"); + assert_eq!(message, "Hello, world!"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_init_localization_with_fallback() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("fr-FR").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test French message + let message1 = get_message("greeting"); + assert_eq!(message1, "Bonjour, le monde!"); + + // Test fallback to English + let message2 = get_message("missing-in-other"); + assert_eq!(message2, "This message only exists in English"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_init_localization_invalid_locale_falls_back_to_english() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("de-DE").unwrap(); // No German file + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Should use English as primary since German failed to load + let message = get_message("greeting"); + assert_eq!(message, "Hello, world!"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_init_localization_already_initialized() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("en-US").unwrap(); + + // Initialize once + let result1 = init_test_localization(&locale, temp_dir.path()); + assert!(result1.is_ok()); + + // Try to initialize again - should fail + let result2 = init_test_localization(&locale, temp_dir.path()); + assert!(result2.is_err()); + + match result2 { + Err(LocalizationError::Bundle(msg)) => { + assert!(msg.contains("already initialized")); + } + _ => panic!("Expected Bundle error"), + } + }) + .join() + .unwrap(); + } + + #[test] + fn test_get_message() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("fr-FR").unwrap(); + + init_test_localization(&locale, temp_dir.path()).unwrap(); + + let message = get_message("greeting"); + assert_eq!(message, "Bonjour, le monde!"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_get_message_with_args() { + use fluent::FluentArgs; + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("en-US").unwrap(); + + init_test_localization(&locale, temp_dir.path()).unwrap(); + + let mut args = FluentArgs::new(); + args.set("name".to_string(), "Bob".to_string()); + + let message = get_message_with_args("welcome", args); + assert_eq!(message, "Welcome, Bob!"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_get_message_with_args_pluralization() { + use fluent::FluentArgs; + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("en-US").unwrap(); + + init_test_localization(&locale, temp_dir.path()).unwrap(); + + // Test singular + let mut args1 = FluentArgs::new(); + args1.set("count", 1); + let message1 = get_message_with_args("count-items", args1); + assert_eq!(message1, "You have 1 item"); + + // Test plural + let mut args2 = FluentArgs::new(); + args2.set("count", 5); + let message2 = get_message_with_args("count-items", args2); + assert_eq!(message2, "You have 5 items"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_thread_local_isolation() { + use std::thread; + + let temp_dir = create_test_locales_dir(); + + // Initialize in main thread with French + let temp_path_main = temp_dir.path().to_path_buf(); + let main_handle = thread::spawn(move || { + let locale = LanguageIdentifier::from_str("fr-FR").unwrap(); + init_test_localization(&locale, &temp_path_main).unwrap(); + let main_message = get_message("greeting"); + assert_eq!(main_message, "Bonjour, le monde!"); + }); + main_handle.join().unwrap(); + + // Test in a different thread - should not be initialized + let temp_path = temp_dir.path().to_path_buf(); + let handle = thread::spawn(move || { + // This thread should have its own uninitialized LOCALIZER + let thread_message = get_message("greeting"); + assert_eq!(thread_message, "greeting"); // Returns ID since not initialized + + // Initialize in this thread with English + let en_locale = LanguageIdentifier::from_str("en-US").unwrap(); + init_test_localization(&en_locale, &temp_path).unwrap(); + let thread_message_after_init = get_message("greeting"); + assert_eq!(thread_message_after_init, "Hello, world!"); + }); + + handle.join().unwrap(); + + // Test another thread to verify French doesn't persist across threads + let final_handle = thread::spawn(move || { + // Should be uninitialized again + let final_message = get_message("greeting"); + assert_eq!(final_message, "greeting"); + }); + final_handle.join().unwrap(); + } + + #[test] + fn test_japanese_localization() { + use fluent::FluentArgs; + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("ja-JP").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test Japanese greeting + let message = get_message("greeting"); + assert_eq!(message, "こんにちは、世界!"); + + // Test Japanese with arguments + let mut args = FluentArgs::new(); + args.set("name".to_string(), "田中".to_string()); + let welcome = get_message_with_args("welcome", args); + assert_eq!(welcome, "ようこそ、田中さん!"); + + // Test Japanese count (no pluralization) + let mut count_args = FluentArgs::new(); + count_args.set("count".to_string(), "5".to_string()); + let count_message = get_message_with_args("count-items", count_args); + assert_eq!(count_message, "5個のアイテムがあります"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_arabic_localization() { + use fluent::FluentArgs; + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("ar-SA").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test Arabic greeting (RTL text) + let message = get_message("greeting"); + assert_eq!(message, "أهلاً بالعالم!"); + + // Test Arabic with arguments + let mut args = FluentArgs::new(); + args.set("name", "أحمد".to_string()); + let welcome = get_message_with_args("welcome", args); + assert_eq!(welcome, "أهلاً وسهلاً، أحمد!"); + + // Test Arabic pluralization (zero case) + let mut args_zero = FluentArgs::new(); + args_zero.set("count", 0); + let message_zero = get_message_with_args("count-items", args_zero); + assert_eq!(message_zero, "لديك لا عناصر"); + + // Test Arabic pluralization (one case) + let mut args_one = FluentArgs::new(); + args_one.set("count", 1); + let message_one = get_message_with_args("count-items", args_one); + assert_eq!(message_one, "لديك عنصر واحد"); + + // Test Arabic pluralization (two case) + let mut args_two = FluentArgs::new(); + args_two.set("count", 2); + let message_two = get_message_with_args("count-items", args_two); + assert_eq!(message_two, "لديك عنصران"); + + // Test Arabic pluralization (few case - 3-10) + let mut args_few = FluentArgs::new(); + args_few.set("count", 5); + let message_few = get_message_with_args("count-items", args_few); + assert_eq!(message_few, "لديك 5 عناصر"); + + // Test Arabic pluralization (other case - 11+) + let mut args_many = FluentArgs::new(); + args_many.set("count", 15); + let message_many = get_message_with_args("count-items", args_many); + assert_eq!(message_many, "لديك 15 عنصر"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_arabic_localization_with_macro() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("ar-SA").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test Arabic greeting (RTL text) + let message = translate!("greeting"); + assert_eq!(message, "أهلاً بالعالم!"); + + // Test Arabic with arguments + let welcome = translate!("welcome", "name" => "أحمد"); + assert_eq!(welcome, "أهلاً وسهلاً، أحمد!"); + + // Test Arabic pluralization (zero case) + let message_zero = translate!("count-items", "count" => 0); + assert_eq!(message_zero, "لديك لا عناصر"); + + // Test Arabic pluralization (one case) + let message_one = translate!("count-items", "count" => 1); + assert_eq!(message_one, "لديك عنصر واحد"); + + // Test Arabic pluralization (two case) + let message_two = translate!("count-items", "count" => 2); + assert_eq!(message_two, "لديك عنصران"); + + // Test Arabic pluralization (few case - 3-10) + let message_few = translate!("count-items", "count" => 5); + assert_eq!(message_few, "لديك 5 عناصر"); + + // Test Arabic pluralization (other case - 11+) + let message_many = translate!("count-items", "count" => 15); + assert_eq!(message_many, "لديك 15 عنصر"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_mixed_script_fallback() { + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("ar-SA").unwrap(); + + let result = init_test_localization(&locale, temp_dir.path()); + assert!(result.is_ok()); + + // Test Arabic message exists + let arabic_message = get_message("greeting"); + assert_eq!(arabic_message, "أهلاً بالعالم!"); + + // Test fallback to English for missing message + let fallback_message = get_message("missing-in-other"); + assert_eq!(fallback_message, "This message only exists in English"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_unicode_directional_isolation_disabled() { + use fluent::FluentArgs; + std::thread::spawn(|| { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("ar-SA").unwrap(); + + init_test_localization(&locale, temp_dir.path()).unwrap(); + + // Test that Latin script names are NOT isolated in RTL context + // since we disabled Unicode directional isolation + let mut args = FluentArgs::new(); + args.set("name".to_string(), "John Smith".to_string()); + let message = get_message_with_args("welcome", args); + + // The Latin name should NOT be wrapped in directional isolate characters + assert!(!message.contains("\u{2068}John Smith\u{2069}")); + assert_eq!(message, "أهلاً وسهلاً، John Smith!"); + }) + .join() + .unwrap(); + } + + #[test] + fn test_parse_resource_error_includes_snippet() { + let temp_dir = create_test_locales_dir(); + let locale = LanguageIdentifier::from_str("es-ES").unwrap(); + + let result = create_test_bundle(&locale, temp_dir.path()); + assert!(result.is_err()); + + if let Err(LocalizationError::ParseResource { + error: _err, + snippet, + }) = result + { + // The snippet should contain exactly the invalid text from es-ES.ftl + assert!( + snippet.contains("This is { $missing"), + "snippet was `{snippet}` but did not include the invalid text" + ); + } else { + panic!("Expected LocalizationError::ParseResource with snippet"); + } + } + + #[test] + fn test_localization_error_from_io_error() { + let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "File not found"); + let loc_error = LocalizationError::from(io_error); + + match loc_error { + LocalizationError::Io { source: _, path } => { + assert_eq!(path, PathBuf::from("")); + } + _ => panic!("Expected IO error variant"), + } + } + + #[test] + fn test_localization_error_uerror_impl() { + let error = LocalizationError::Bundle("some error".to_string()); + assert_eq!(error.code(), 1); + } + + #[test] + fn test_get_message_not_initialized() { + std::thread::spawn(|| { + let message = get_message("greeting"); + assert_eq!(message, "greeting"); // Should return the ID itself + }) + .join() + .unwrap(); + } + + #[test] + fn test_detect_system_locale_from_lang_env() { + // Test locale parsing logic directly instead of relying on environment variables + // which can have race conditions in multi-threaded test environments + + // Test parsing logic with UTF-8 encoding + let locale_with_encoding = "fr-FR.UTF-8"; + let parsed = locale_with_encoding.split('.').next().unwrap(); + let lang_id = LanguageIdentifier::from_str(parsed).unwrap(); + assert_eq!(lang_id.to_string(), "fr-FR"); + + // Test parsing logic without encoding + let locale_without_encoding = "es-ES"; + let lang_id = LanguageIdentifier::from_str(locale_without_encoding).unwrap(); + assert_eq!(lang_id.to_string(), "es-ES"); + + // Test that DEFAULT_LOCALE is valid + let default_lang_id = LanguageIdentifier::from_str(DEFAULT_LOCALE).unwrap(); + assert_eq!(default_lang_id.to_string(), "en-US"); + } + + #[test] + fn test_detect_system_locale_no_lang_env() { + // Save current LANG value + let original_lang = env::var("LANG").ok(); + + // Remove LANG environment variable + unsafe { + env::remove_var("LANG"); + } + + let result = detect_system_locale(); + assert!(result.is_ok()); + assert_eq!(result.unwrap().to_string(), "en-US"); + + // Restore original LANG value + if let Some(val) = original_lang { + unsafe { + env::set_var("LANG", val); + } + } else { + {} // Was already unset + } + } + + #[test] + fn test_setup_localization_success() { + std::thread::spawn(|| { + // Save current LANG value + let original_lang = env::var("LANG").ok(); + unsafe { + env::set_var("LANG", "en-US.UTF-8"); // Use English since we have embedded resources for "test" + } + + let result = setup_localization("test"); + assert!(result.is_ok()); + + // Test that we can get messages (should use embedded English for "test" utility) + let message = get_message("test-about"); + // Since we're using embedded resources, we should get the expected message + assert!(!message.is_empty()); + + // Restore original LANG value + if let Some(val) = original_lang { + unsafe { + env::set_var("LANG", val); + } + } else { + unsafe { + env::remove_var("LANG"); + } + } + }) + .join() + .unwrap(); + } + + #[test] + fn test_setup_localization_falls_back_to_english() { + std::thread::spawn(|| { + // Save current LANG value + let original_lang = env::var("LANG").ok(); + unsafe { + env::set_var("LANG", "de-DE.UTF-8"); // German file doesn't exist, should fallback + } + + let result = setup_localization("test"); + assert!(result.is_ok()); + + // Should fall back to English embedded resources + let message = get_message("test-about"); + assert!(!message.is_empty()); // Should get something, not just the key + + // Restore original LANG value + if let Some(val) = original_lang { + unsafe { + env::set_var("LANG", val); + } + } else { + unsafe { + env::remove_var("LANG"); + } + } + }) + .join() + .unwrap(); + } + + #[test] + fn test_setup_localization_fallback_to_embedded() { + std::thread::spawn(|| { + // Force English locale for this test + unsafe { + env::set_var("LANG", "en-US"); + } + + // Test with a utility name that has embedded locales + // This should fall back to embedded English when filesystem files aren't found + let result = setup_localization("test"); + if let Err(e) = &result { + eprintln!("Setup localization failed: {e}"); + } + assert!(result.is_ok()); + + // Verify we can get messages (using embedded English) + let message = get_message("test-about"); + assert_eq!(message, "Check file types and compare values."); // Should use embedded English + }) + .join() + .unwrap(); + } + + #[test] + fn test_error_display() { + let io_error = LocalizationError::Io { + source: std::io::Error::new(std::io::ErrorKind::NotFound, "File not found"), + path: PathBuf::from("/test/path.ftl"), + }; + let error_string = format!("{io_error}"); + assert!(error_string.contains("I/O error loading")); + assert!(error_string.contains("/test/path.ftl")); + + let bundle_error = LocalizationError::Bundle("Bundle creation failed".to_string()); + let bundle_string = format!("{bundle_error}"); + assert!(bundle_string.contains("Bundle error: Bundle creation failed")); + } + + #[test] + fn test_clap_localization_fallbacks() { + std::thread::spawn(|| { + // Test the scenario where localization isn't properly initialized + // and we need fallbacks for clap error handling + + // First, test when localizer is not initialized + let error_msg = get_message("common-error"); + assert_eq!(error_msg, "common-error"); // Should return key when not initialized + + let tip_msg = get_message("common-tip"); + assert_eq!(tip_msg, "common-tip"); // Should return key when not initialized + + // Now initialize with setup_localization + let result = setup_localization("comm"); + if result.is_err() { + // If setup fails (e.g., no embedded locales for comm), try with a known utility + let _ = setup_localization("test"); + } + + // Test that common strings are available after initialization + let error_after_init = get_message("common-error"); + // Should either be translated or return the key (but not panic) + assert!(!error_after_init.is_empty()); + + let tip_after_init = get_message("common-tip"); + assert!(!tip_after_init.is_empty()); + + // Test that clap error keys work with fallbacks + let unknown_arg_key = get_message("clap-error-unexpected-argument"); + assert!(!unknown_arg_key.is_empty()); + + // Test usage key fallback + let usage_key = get_message("common-usage"); + assert!(!usage_key.is_empty()); + }) + .join() + .unwrap(); + } +} + +#[cfg(all(test, not(debug_assertions)))] +mod fhs_tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn resolves_fhs_share_locales_layout() { + // 1. Set up a fake installation prefix in a temp directory + let prefix = TempDir::new().unwrap(); // e.g. /tmp/xyz + let bin_dir = prefix.path().join("bin"); // /tmp/xyz/bin + let share_dir = prefix.path().join("share").join("locales").join("cut"); // /tmp/xyz/share/locales/cut + std::fs::create_dir_all(&share_dir).unwrap(); + std::fs::create_dir_all(&bin_dir).unwrap(); + + // 2. Pretend the executable lives in /bin + let exe_dir = bin_dir.as_path(); + + // 3. Ask the helper to resolve the locales dir + let result = resolve_locales_dir_from_exe_dir(exe_dir, "cut") + .expect("should find locales via FHS path"); + + assert_eq!(result, share_dir); + } +} diff --git a/src/utils.rs b/src/uudiff/src/lib/mods/utils.rs similarity index 85% rename from src/utils.rs rename to src/uudiff/src/lib/mods/utils.rs index daca18d..8290451 100644 --- a/src/utils.rs +++ b/src/uudiff/src/lib/mods/utils.rs @@ -1,23 +1,31 @@ // This file is part of the uutils diffutils package. // -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. -use regex::Regex; -use std::{ffi::OsString, io::Write}; +use std::{ffi::OsStr, io::Write}; use unicode_width::UnicodeWidthStr; +/// Return of compare function if no error occurred. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompareOk { + Equal, + Different, +} + /// Replace tabs by spaces in the input line. /// Correctly handle multi-bytes characters. /// This assumes that line does not contain any line breaks (if it does, the result is undefined). +// TODO This function does not seem to be used. #[must_use] +#[allow(clippy::naive_bytecount)] pub fn do_expand_tabs(line: &[u8], tabsize: usize) -> Vec { let tab = b'\t'; - let ntabs = line.iter().filter(|c| **c == tab).count(); - if ntabs == 0 { + let n_tabs = line.iter().filter(|c| **c == tab).count(); + if n_tabs == 0 { return line.to_vec(); } - let mut result = Vec::with_capacity(line.len() + ntabs * (tabsize - 1)); + let mut result = Vec::with_capacity(line.len() + n_tabs * (tabsize - 1)); let mut offset = 0; let mut iter = line.split(|c| *c == tab).peekable(); @@ -71,31 +79,9 @@ pub fn get_modification_time(file_path: &str) -> String { modification_time } -pub fn format_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) -> String { - // std::io::Error's display trait outputs "{detail} (os error {code})" - // but we want only the {detail} (error string) part - let error_code_re = Regex::new(r"\ \(os\ error\ \d+\)$").unwrap(); - format!( - "{}: {}: {}", - executable.to_string_lossy(), - filepath.to_string_lossy(), - error_code_re.replace(error.to_string().as_str(), ""), - ) -} - -pub fn report_failure_to_read_input_file( - executable: &OsString, - filepath: &OsString, - error: &std::io::Error, -) { - eprintln!( - "{}", - format_failure_to_read_input_file(executable, filepath, error) - ); +/// Checks if files are the same (same file link), which must return 'equal'. +pub fn is_same_file(from: &OsStr, to: &OsStr) -> bool { + (from == "-" && to == "-") || same_file::is_same_file(from, to).unwrap_or(false) } #[cfg(test)] diff --git a/tests/by-util/test_cmp.rs b/tests/by-util/test_cmp.rs new file mode 100644 index 0000000..b6a804f --- /dev/null +++ b/tests/by-util/test_cmp.rs @@ -0,0 +1,991 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// #[cfg(target_os = "linux")] + +// spell-checker:ignore ijkl ndefg + +use ::cmp::params_cmp::{Params, SkipU64, uu_app}; +use assert_cmd::cargo::cargo_bin_cmd; +use predicates::prelude::predicate; +use std::{ffi::OsString, fs::File, io::Write}; +use tempfile::tempdir; +use uudiff::error::UResult; +use uutests::{at_and_ucmd, new_ucmd}; + +mod cmp { + + use super::*; + + #[test] + fn test_files_equal() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_equal.txt") + .succeeds() + .no_output(); + } + + #[test] + #[cfg(not(windows))] + fn test_files_different() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .fails_with_code(1) + .stdout_is("lorem_ipsum.txt lorem_ipsum_diff.txt differ: char 190, line 4\n"); + } + + #[test] + #[cfg(windows)] + fn test_files_different() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .fails_with_code(1) + .stdout_is("lorem_ipsum.txt lorem_ipsum_diff.txt differ: char 193, line 4\n"); + } + + #[test] + fn test_files_different_immediate() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"bcd\n").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 1, line 1 is 141 a 142 b\n", + )); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq( + "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", + )); + + Ok(()) + } + + #[test] + fn test_stdin() -> Result<(), Box> { + let tmp_dir = tempfile::tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + // TODO cmp is not yet compiled automatically + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("a\n"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path); + cmd.write_stdin("b\n"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); + + Ok(()) + } + + #[test] + fn test_invalid_file_is_dir() { + let (at, mut ucmd) = at_and_ucmd!(); + + at.mkdir("a_dir"); + + ucmd.arg("a_dir") + .fails_with_code(2) + .stderr_is("cmp: a_dir: Is a directory\n"); + } + + #[test] + fn cmp_one_file_empty() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let _ = File::create(&b_path).unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::contains(" EOF on ")) + .stderr(predicate::str::ends_with(" which is empty\n")); + + Ok(()) + } + + #[test] + fn cmp_newline_difference() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc\ndefg").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abc\ndef\ng").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 8, line 2 is 147 g 12 ^J\n", + )); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 12\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-l"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) + .stderr(predicate::str::contains(" EOF on")) + .stderr(predicate::str::ends_with(" after byte 8\n")); + + Ok(()) + } + + #[test] + fn cmp_max_bytes() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq("4 40 144 d\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-n"); + cmd.arg("13"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n")); + Ok(()) + } + + #[test] + fn cmp_skip_args_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"---abc\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"###abc\n").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Positional skips should be ignored + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg("-i"); + cmd.arg("3"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("1").arg("1"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + // Single positional argument should only affect first file. + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.env("LC_ALL", "C"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.arg("3"); + cmd.arg("3"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip_suffix_parsing() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + writeln!(a, "{}c", "a".repeat(1024)).unwrap(); + a.flush().unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + writeln!(b, "{}c", "b".repeat(1024)).unwrap(); + b.flush().unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("--ignore-initial=1K"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + Ok(()) + } + + #[test] + fn cmp_skip() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"abc efg ijkl\n").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(b"abcdefghijkl\n").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("8"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::is_empty()); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-b"); + cmd.arg("-i"); + cmd.arg("4"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::ends_with( + " differ: byte 4, line 1 is 40 150 h\n", + )); + + Ok(()) + } + + #[test] + fn cmp_binary() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(&bytes).unwrap(); + + bytes.reverse(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(&bytes).unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-l"); + cmd.arg("-b"); + cmd.arg(&a_path).arg(&b_path); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(concat!( + " 1 0 ^@ 377 M-^?\n", + " 2 17 ^O 376 M-~\n", + " 3 37 ^_ 310 M-H\n", + " 4 40 201 M-^A\n", + " 5 41 ! 200 M-^@\n", + " 6 50 ( 177 ^?\n", + " 7 100 @ 176 ~\n", + " 8 176 ~ 100 @\n", + " 9 177 ^? 50 (\n", + "10 200 M-^@ 41 !\n", + "11 201 M-^A 40 \n", + "12 310 M-H 37 ^_\n", + "13 376 M-~ 17 ^O\n", + "14 377 M-^? 0 ^@\n" + ))); + + Ok(()) + } + + #[test] + #[cfg(not(windows))] + fn cmp_fast_paths() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + // This test mimics one found in the GNU cmp test suite. It is used for + // validating the /dev/null optimization. + let a_path = tmp_dir.path().join("a"); + let a = File::create(&a_path).unwrap(); + a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let b_path = tmp_dir.path().join("b"); + let b = File::create(&b_path).unwrap(); + b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); + + let dev_null = std::fs::OpenOptions::new() + .write(true) + .open("/dev/null") + .unwrap(); + + let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin!("diffutils")) + .arg("cmp") + .arg(&a_path) + .arg(&b_path) + .stdout(dev_null) + .spawn() + .unwrap(); + + // Bound the runtime to a very short time that still allows for some resource + // constraint to slow it down while also allowing very fast systems to exit as + // early as possible. + const MAX_TRIES: u8 = 50; + for tries in 0..=MAX_TRIES { + assert!( + tries != MAX_TRIES, + "cmp took too long to run, /dev/null optimization probably not working" + ); + match child.try_wait() { + Ok(Some(status)) => { + assert_eq!(status.code(), Some(1)); + break; + } + Ok(None) => (), + Err(e) => panic!("{e:#?}"), + } + std::thread::sleep(std::time::Duration::from_millis(10)); + } + + // Two standard inputs should be equal + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg("-"); + cmd.arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Files with longer than block size equal segments should still report + // the correct line number for the difference. Assumes 8KB block size (see + // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), + // create a 24KB equality. + let mut bytes = " ".repeat(4095); + bytes.push('\n'); + bytes.push_str(&" ".repeat(4096)); + + let bytes = bytes.repeat(3); + let bytes = bytes.as_bytes(); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(bytes).unwrap(); + a.write_all(b"A").unwrap(); + + let b_path = tmp_dir.path().join("b"); + let mut b = File::create(&b_path).unwrap(); + b.write_all(bytes).unwrap(); + b.write_all(b"B").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("cmp"); + cmd.arg(&a_path).arg(&b_path); + cmd.env("LC_ALL", "en_US"); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); + + Ok(()) + } +} + +mod parser { + + use super::*; + + fn os(s: &str) -> OsString { + OsString::from(s) + } + + /// Simplify call of parser, just pass a normal string like in the terminal. + fn parse(args: &str) -> UResult { + let opts = args + .split(' ') + .filter(|arg| !arg.is_empty()) + .map(OsString::from); + + let matches = uudiff::clap_localization::handle_clap_result(uu_app(), opts)?; + let params: Params = matches.try_into()?; + + Ok(params) + } + + #[test] + fn test_invalid_arg() { + new_ucmd!() + .arg("--definitely-invalid") + .fails_with_code(2) + .stderr_contains("unexpected option '--definitely-invalid' found"); + } + + #[test] + fn test_parser_no_arg() { + new_ucmd!() + .fails_with_code(2) + .stderr_contains("cmp: missing operand after 'cmp'"); + } + + #[test] + /// --ver ambiguous --version --verbose + fn test_parser_ambiguous() { + new_ucmd!() + .arg("--ver") + .fails_with_code(2) + .stderr_contains("--verbose") + .stderr_contains("--version"); + } + + #[test] + // multiple tests in one for historical reasons + fn test_parser_positional() { + // file_1 only + assert_eq!( + parse("cmp foo").unwrap(), + Params { + from: os("foo"), + to: os("-"), + ..Default::default() + } + ); + + // double dash without operand: following is interpreted as file + assert_eq!( + parse("cmp foo -- --help").unwrap(), + Params { + from: os("foo"), + to: os("--help"), + ..Default::default() + } + ); + + // --ignore-initial for file_1 as operand + assert_eq!( + parse("cmp foo bar 1K").unwrap(), + Params { + from: os("foo"), + to: os("bar"), + skip_bytes_from: Some(1024), + skip_bytes_to: None, + ..Default::default() + } + ); + } + + #[test] + /// --bytes with value greater than u64 + fn test_parser_bytes_value_too_large() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("--bytes") + .arg("1ZB") + .fails_with_code(2) + .stderr_contains("cmp: invalid unit in '1ZB' for option '--bytes'"); + + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("--bytes") + .arg("99999999999999999999999999999999999999999999999999999999999") + .fails_with_code(2) + .stderr_contains("cmp: invalid value '99999999999999999999999999999999999999999999999999999999999' (too large) for option '--bytes'"); + } + + #[test] + /// --bytes with value negative + fn test_parser_bytes_negative() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("--bytes=-1") + .fails_with_code(2) + .stderr_contains("cmp: invalid value '-1' for option '--bytes'"); + } + + #[test] + /// --ignore-initial with value greater than u64) + fn test_parser_ignore_initial_value_too_large() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("1") + .arg("2Y") + .fails_with_code(2) + .stderr_contains("cmp: invalid unit in '2Y' for option '--ignore-initial'"); + + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("--ignore-initial") + .arg("99999999999999999999999999999999999999999999999999999999999") + .fails_with_code(2) + .stderr_contains("cmp: invalid value '99999999999999999999999999999999999999999999999999999999999' (too large) for option '--ignore-initial'"); + } + + #[test] + /// --ignore-initial as operands with 1 2Y (which is greater than u64) + fn test_parser_ignore_initial_too_many_values() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("--ignore-initial") + .arg("1:2:3") + .fails_with_code(2) + .stderr_contains("cmp: invalid unit in '2:3' for option '--ignore-initial'"); + } + + #[test] + fn test_parser_too_many_operands() { + new_ucmd!() + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .arg("1") + .arg("2") + .arg("3") + .fails_with_code(2) + .stderr_contains("cmp: extra operand '3'"); + } + + #[test] + fn test_parser_incompatible_silent_and_verbose() { + new_ucmd!() + .arg("--silent") + .arg("--verbose") + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .fails_with_code(2) + .stderr_contains("cmp: options '--verbose' and '--silent' are incompatible"); + } + + #[test] + fn test_parser_incompatible_quiet_and_verbose() { + new_ucmd!() + .arg("--quiet") + .arg("--verbose") + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .fails_with_code(2) + .stderr_contains("cmp: options '--verbose' and '--silent' are incompatible"); + } + + #[test] + // This is not a GNU error, but should be + fn test_parser_incompatible_silent_and_print_bytes() { + new_ucmd!() + .arg("--silent") + .arg("--print-bytes") + .arg("lorem_ipsum.txt") + .arg("lorem_ipsum_diff.txt") + .fails_with_code(2) + .stderr_contains("cmp: options '--print-bytes' and '--silent' are incompatible"); + } + + #[test] + fn test_execution_modes() { + // --print-bytes + let print_bytes = Params { + from: os("foo"), + to: os("bar"), + print_bytes: true, + ..Default::default() + }; + assert_eq!(parse("cmp -b foo bar").unwrap(), print_bytes.clone()); + assert_eq!( + parse("cmp --print-bytes foo bar").unwrap(), + (print_bytes.clone()) + ); + assert_eq!(parse("cmp --pr foo bar").unwrap(), print_bytes); + + // --verbose + let verbose = Params { + from: os("foo"), + to: os("bar"), + verbose: true, + ..Default::default() + }; + assert_eq!(parse("cmp -l foo bar").unwrap(), verbose.clone()); + assert_eq!(parse("cmp --verbose foo bar").unwrap(), verbose.clone()); + assert_eq!(parse("cmp --verb foo bar").unwrap(), verbose.clone()); + + // --verbose & --print-bytes + let verbose_and_print_bytes = Params { + from: os("foo"), + to: os("bar"), + print_bytes: true, + verbose: true, + ..Default::default() + }; + assert_eq!( + parse("cmp -l -b foo bar").unwrap(), + verbose_and_print_bytes.clone() + ); + assert_eq!( + parse("cmp -lb foo bar").unwrap(), + verbose_and_print_bytes.clone() + ); + assert_eq!( + parse("cmp -bl foo bar").unwrap(), + verbose_and_print_bytes.clone() + ); + assert_eq!( + parse("cmp --verbose --print-bytes foo bar").unwrap(), + verbose_and_print_bytes.clone() + ); + assert_eq!( + parse("cmp --verb --p foo bar").unwrap(), + verbose_and_print_bytes.clone() + ); + + // --silent --quiet + let silent = Params { + from: os("foo"), + to: os("bar"), + silent: true, + ..Default::default() + }; + assert_eq!(parse("cmp -s foo bar").unwrap(), silent.clone()); + assert_eq!(parse("cmp --silent foo bar").unwrap(), (silent.clone())); + assert_eq!(parse("cmp --quiet foo bar").unwrap(), (silent.clone())); + } + + #[test] + /// These are all identical: + /// - cmp file_1 file_2 -bl -n 1024 + /// - cmp file_1 file_2 -bl -n 1k + /// - cmp file_1 file_2 -bl -n 1K + /// - cmp file_1 file_2 -bl -n 1KiB + /// - cmp file_1 file_2 -bl -n 1kiB + /// - cmp file_1 file_2 -bl -n1kiB + /// - cmp file_1 file_2 -bln1kiB + fn test_bytes_limit() { + let mut bytes_limit = Params { + from: os("foo"), + to: os("bar"), + bytes_limit: Some(1000), + ..Default::default() + }; + assert_eq!(parse("cmp -n 1000 foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n1000 foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1kB foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1KB foo bar").unwrap(), (bytes_limit.clone())); + + bytes_limit.bytes_limit = Some(1024); + assert_eq!(parse("cmp -n 1024 foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1k foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1K foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1KiB foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1kiB foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n1024 foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n1k foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n1K foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!( + parse("cmp --bytes=1024 foo bar").unwrap(), + bytes_limit.clone() + ); + assert_eq!( + parse("cmp --bytes=1K foo bar").unwrap(), + (bytes_limit.clone()) + ); + assert_eq!( + parse("cmp --bytes 1024 foo bar").unwrap(), + bytes_limit.clone() + ); + assert_eq!( + parse("cmp --bytes 1K foo bar").unwrap(), + (bytes_limit.clone()) + ); + bytes_limit.print_bytes = true; + bytes_limit.verbose = true; + assert_eq!( + parse("cmp -bln1kiB foo bar").unwrap(), + (bytes_limit.clone()) + ); + bytes_limit.print_bytes = false; + bytes_limit.verbose = false; + + // Test large numbers + // Most modern Linux distributions (like Debian, Ubuntu, or CentOS) compile their core utilities (GNU diffutils) with Large File Support (LFS). + // This uses the _FILE_OFFSET_BITS=64 flag, which forces the system to use a 64-bit integer ($off\_t$) for file offsets and sizes. + // Even on a 32-bit processor, cmp can handle files much larger than the system's memory or 4 GB address space.The limit: + // Technically $9,223,372,036,854,775,807$ bytes. + // This is a problematic topic. File sizes can be larger than u64. Should the new cmp allow larger numbers (u128)? + bytes_limit.bytes_limit = Some(1_000_000); + assert_eq!(parse("cmp -n 1MB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_048_576); + assert_eq!(parse("cmp -n 1M foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1MiB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_000_000_000); + assert_eq!(parse("cmp -n 1GB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_073_741_824); + assert_eq!(parse("cmp -n 1G foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1GiB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_000_000_000_000); + assert_eq!(parse("cmp -n 1TB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_099_511_627_776); + assert_eq!(parse("cmp -n 1T foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1TiB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_000_000_000_000_000); + assert_eq!(parse("cmp -n 1PB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_125_899_906_842_624); + assert_eq!(parse("cmp -n 1P foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1PiB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_000_000_000_000_000_000); + assert_eq!(parse("cmp -n 1EB foo bar").unwrap(), (bytes_limit.clone())); + bytes_limit.bytes_limit = Some(1_152_921_504_606_846_976); + assert_eq!(parse("cmp -n 1E foo bar").unwrap(), (bytes_limit.clone())); + assert_eq!(parse("cmp -n 1EiB foo bar").unwrap(), (bytes_limit.clone())); + } + + #[test] + fn test_ignore_initial() { + let mut skips = Params { + from: os("foo"), + to: os("bar"), + skip_bytes_from: Some(1), + skip_bytes_to: Some(1), + ..Default::default() + }; + assert_eq!(parse("cmp -i 1 foo bar").unwrap(), skips.clone()); + assert_eq!( + parse("cmp --ignore-initial 1 foo bar").unwrap(), + skips.clone() + ); + assert_eq!(parse("cmp --ig 1 foo bar").unwrap(), skips.clone()); + + // 2nd value different + skips.skip_bytes_to = Some(2); + assert_eq!( + parse("cmp --ignore-initial=1:2 foo bar").unwrap(), + skips.clone() + ); + + // uses higher positional arguments when operand and options are both provided + skips.skip_bytes_from = Some(3); + skips.skip_bytes_to = Some(4); + assert_eq!( + parse("cmp --ignore-initial=1:2 foo bar 3 4").unwrap(), + skips.clone() + ); + + // large numbers + skips.skip_bytes_from = Some(1_000_000_000); + skips.skip_bytes_to = Some(2 * 1_152_921_504_606_846_976); + assert_eq!( + parse("cmp --ignore-initial=1GB:2E foo bar").unwrap(), + skips.clone() + ); + + // All special suffixes for ignore-initial. + for (i, suffixes) in [ + ["kB", "K"], + ["MB", "M"], + ["GB", "G"], + ["TB", "T"], + ["PB", "P"], + ["EB", "E"], + // These values give an error in GNU cmp + // #[cfg(feature = "cmp_bytes_limit_128_bit")] + // ["ZB", "Z"], + // #[cfg(feature = "cmp_bytes_limit_128_bit")] + // ["YB", "Y"], + ] + .iter() + .enumerate() + { + let values = [ + (1_000 as SkipU64) + .checked_pow((i + 1) as u32) + .expect(&format!("number too large for suffix {suffixes:?}")), + (1024 as SkipU64) + .checked_pow((i + 1) as u32) + .expect(&format!("number too large for suffix {suffixes:?}")), + ]; + for (j, v) in values.iter().enumerate() { + assert_eq!( + parse(&format!("cmp -i 1{}:2 foo bar", suffixes[j])).unwrap(), + Params { + from: os("foo"), + to: os("bar"), + skip_bytes_from: Some(*v), + skip_bytes_to: Some(2), + ..Default::default() + } + ); + } + } + } +} diff --git a/tests/by-util/test_diff.rs b/tests/by-util/test_diff.rs new file mode 100644 index 0000000..cda4787 --- /dev/null +++ b/tests/by-util/test_diff.rs @@ -0,0 +1,642 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// #[cfg(target_os = "linux")] + +// spell-checker:ignore alef alefr alefx betr betx nodiff + +use assert_cmd::cargo::cargo_bin_cmd; +use predicates::prelude::{PredicateBooleanExt, predicate}; +use std::{fs::File, io::Write}; +use tempfile::{NamedTempFile, tempdir}; +use uudiff::assert_diff_eq; +use uutests::new_ucmd; + +mod diff { + + use super::*; + + #[test] + fn no_differences() -> Result<(), Box> { + let file = NamedTempFile::new()?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file.path()).arg(file.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + } + Ok(()) + } + + #[test] + fn no_differences_report_identical_files() -> Result<(), Box> { + // same file + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file1.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file1.path().to_string_lossy(), + ))); + } + // two files with the same content + let mut file2 = NamedTempFile::new()?; + file2.write_all("foo\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-s").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::eq(format!( + "Files {} and {} are identical\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy(), + ))); + } + Ok(()) + } + + #[test] + fn differences() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::str::is_empty().not()); + } + Ok(()) + } + + #[test] + fn differences_brief() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + for option in ["", "-u", "-c", "-e"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + if !option.is_empty() { + cmd.arg(option); + } + cmd.arg("-q").arg(file1.path()).arg(file2.path()); + cmd.assert() + .code(predicate::eq(1)) + .failure() + .stdout(predicate::eq(format!( + "Files {} and {} differ\n", + file1.path().to_string_lossy(), + file2.path().to_string_lossy() + ))); + } + Ok(()) + } + + #[test] + fn missing_newline() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar".as_bytes())?; + new_ucmd!() + .arg("-e") + .arg(file1.path()) + .arg(file2.path()) + .fails_with_code(2) + .stderr_str() + .starts_with("No newline at end of file"); + Ok(()) + } + + #[test] + fn read_from_stdin() -> Result<(), Box> { + let mut file1 = NamedTempFile::new()?; + file1.write_all("foo\n".as_bytes())?; + let mut file2 = NamedTempFile::new()?; + file2.write_all("bar\n".as_bytes())?; + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("-") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u") + .arg("-") + .arg(file2.path()) + .write_stdin("foo\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file2.path().to_string_lossy() + ) + ); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u").arg("-").arg("-"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::is_empty()); + + #[cfg(unix)] + { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u") + .arg(file1.path()) + .arg("/dev/stdin") + .write_stdin("bar\n"); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", + file1.path().to_string_lossy() + ) + ); + } + + Ok(()) + } + + #[test] + fn compare_file_to_directory() -> Result<(), Box> { + let tmp_dir = tempdir()?; + + let directory = tmp_dir.path().join("d"); + let _ = std::fs::create_dir(&directory); + + let a_path = tmp_dir.path().join("a"); + let mut a = File::create(&a_path).unwrap(); + a.write_all(b"a\n").unwrap(); + + let da_path = directory.join("a"); + let mut da = File::create(&da_path).unwrap(); + da.write_all(b"da\n").unwrap(); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u").arg(&directory).arg(&a_path); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", + da_path.display(), + a_path.display() + ) + ); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg("-u").arg(&a_path).arg(&directory); + cmd.assert().code(predicate::eq(1)).failure(); + + let output = cmd.output().unwrap().stdout; + assert_diff_eq!( + output, + format!( + "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", + a_path.display(), + da_path.display() + ) + ); + + Ok(()) + } +} + +#[cfg(test)] +mod parser { + use ::diff::params_diff::Params; + use diff::{clap_preparation, params_diff::FormatOutput}; + use uudiff::error::UResult; + + // use super::*; + use std::ffi::OsString; + + fn os(s: &str) -> OsString { + OsString::from(s) + } + + /// Simplify call of parser, just pass a normal string like in the terminal. + fn parse(args: &str) -> UResult { + let opts = args + .split(' ') + .filter(|arg| !arg.is_empty()) + .map(OsString::from); + + let opts = clap_preparation(opts); + let matches = + uudiff::clap_localization::handle_clap_result(::diff::params_diff::uu_app(), opts)?; + let params: Params = matches.try_into()?; + + Ok(params) + } + + #[test] + fn test_param_basics() { + let params = Params { + from: os("foo"), + to: os("bar"), + ..Default::default() + }; + assert_eq!(params, parse("diff foo bar").unwrap()); + assert_eq!(params, parse("diff --normal foo bar").unwrap()); + } + + #[test] + fn test_param_ed() { + for arg in ["-e", "--ed"] { + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Ed, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + } + + #[test] + fn test_conflicting_output_styles() { + for arg in [ + "-u -c", + "-u -e", + "-c -u", + "-c -U42", + "-u --normal", + "--normal -e", + "--context --normal", + ] { + assert!(parse(&format!("diff {arg} foo bar")).is_err()); + } + } + + #[test] + fn context_valid() { + for arg in ["-c", "--context", "--context="] { + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Context, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + + for arg in ["-c=42", "-C42", "-C 42", "--context=42"] { + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Context, + n_output_lines: 42, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + } + + /// These tests are failing as clap cannot be configured to read this + /// possibly able to handle with: .allow_external_subcommands(true) + #[test] + fn context_valid_clap_limitation() { + for arg in ["-c42", "-42c"] { + dbg!(arg); + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Context, + n_output_lines: 42, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + } + + #[test] + fn context_invalid() { + for arg in [ + "-c 42", + // TODO allowed? "-c=42", works here + // "-c=", works here, default + "-C", + // "-C=42", works here + // "-C=", works here + "--context42", + "--context 42", + "-42C", + ] { + // dbg!(&arg); + assert!(parse(&format!("diff {arg} foo bar")).is_err()); + } + } + + #[test] + fn context_lines_count() { + // clap limitation requires pre-parsing + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + n_output_lines: 54, + ..Default::default() + }, + parse("diff -u54 foo bar").unwrap() + ); + + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + n_output_lines: 54, + ..Default::default() + }, + parse("diff -U54 foo bar").unwrap() + ); + + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + n_output_lines: 54, + ..Default::default() + }, + parse("diff -U 54 foo bar").unwrap() + ); + + // clap limitation requires pre-parsing + // https://github.com/clap-rs/clap/issues/6312 + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Context, + n_output_lines: 54, + ..Default::default() + }, + parse("diff -c54 foo bar").unwrap() + ); + } + + #[test] + fn unified_valid() { + for arg in ["-u", "--unified", "--unified="] { + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + + for arg in ["-U42", "-U 42", "--unified=42"] { + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + n_output_lines: 42, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + } + + /// These tests are failing as clap cannot be configured to read this + /// possibly able to handle with: .allow_external_subcommands(true) + #[test] + fn unified_valid_clap_limitation() { + for arg in ["-u42", "-42u"] { + dbg!(arg); + assert_eq!( + Params { + from: os("foo"), + to: os("bar"), + format_out: FormatOutput::Unified, + n_output_lines: 42, + ..Default::default() + }, + parse(&format!("diff {arg} foo bar")).unwrap() + ); + } + } + + #[test] + fn unified_invalid() { + for arg in [ + "-u 42", + // "-u=42", // works here + // "-u=", // works here + "-U", + // "-U=42", // works here + // "-U=", // works here + "--unified42", + "--unified 42", + "-42U", + ] { + // dbg!(&arg); + assert!(parse(&format!("diff {arg} foo bar")).is_err()); + } + } + + #[test] + fn test_param_brief() { + let params = Params { + from: os("foo"), + to: os("bar"), + brief: true, + ..Default::default() + }; + assert_eq!(params, parse("diff -q foo bar").unwrap()); + assert_eq!(params, parse("diff --brief foo bar").unwrap()); + } + + #[test] + fn test_param_expand_tabs() { + let params = Params { + from: os("foo"), + to: os("bar"), + expand_tabs: true, + ..Default::default() + }; + assert_eq!(params, parse("diff -t foo bar").unwrap()); + assert_eq!(params, parse("diff --expand-tabs foo bar").unwrap()); + } + + #[test] + fn test_param_report_identical_files() { + let params = Params { + from: os("foo"), + to: os("bar"), + report_identical_files: true, + ..Default::default() + }; + assert_eq!(params, parse("diff -s foo bar").unwrap()); + assert_eq!( + params, + parse("diff --report-identical-files foo bar").unwrap() + ); + } + + #[test] + fn test_param_tabsize() { + let mut params = Params { + from: os("foo"), + to: os("bar"), + tabsize: 1, + ..Default::default() + }; + assert_eq!(params, parse("diff --tabsize=1 foo bar").unwrap()); + params.tabsize = 42; + assert_eq!(params, parse("diff --tabsize=42 foo bar").unwrap()); + assert!(parse("diff --tabsize foo bar").is_err()); + assert!(parse("diff --tabsize= foo bar").is_err()); + assert!(parse("diff --tabsize=r2 foo bar").is_err()); + assert!(parse("diff --tabsize=-1 foo bar").is_err()); + assert!(parse("diff --tabsize=92233720368547758088 foo bar").is_err()); + } + + #[test] + fn test_param_width() { + let mut params = Params { + from: os("foo"), + to: os("bar"), + width: 130, + ..Default::default() + }; + assert_eq!(params, parse("diff foo bar").unwrap()); + params.width = 42; + assert_eq!(params, parse("diff -W42 foo bar").unwrap()); + assert_eq!(params, parse("diff -W 42 foo bar").unwrap()); + assert_eq!(params, parse("diff --width=42 foo bar").unwrap()); + assert_eq!(params, parse("diff --width 42 foo bar").unwrap()); + assert!(parse("diff --width foo bar").is_err()); + } + + #[test] + fn test_double_dash() { + let params = Params { + from: os("-g"), + to: os("-h"), + ..Default::default() + }; + assert_eq!(params, parse("diff -- -g -h").unwrap()); + } + + #[test] + fn test_default_to_stdin() { + let params = Params { + from: os("foo"), + to: os("-"), + ..Default::default() + }; + assert_eq!(params, parse("diff foo -").unwrap()); + assert_eq!( + Params { + from: os("-"), + to: os("bar"), + ..Default::default() + }, + parse("diff - bar").unwrap() + ); + assert_eq!( + Params { + from: os("-"), + to: os("-"), + ..Default::default() + }, + parse("diff - -").unwrap() + ); + assert!(parse("diff foo bar -").is_err()); + assert!(parse("diff - - -").is_err()); + } + + #[test] + fn test_missing_arguments() { + assert!(parse("diff").is_err()); + assert!(parse("diff foo").is_err()); + } + + #[test] + fn test_unknown_argument() { + assert!(parse("diff -g foo bar").is_err()); + assert!(parse("diff -g bar").is_err()); + assert!(parse("diff -g").is_err()); + } + + #[test] + fn test_no_arguments() { + assert!(parse("").is_err()); + } +} diff --git a/tests/fixtures/cmp/empty_file.txt b/tests/fixtures/cmp/empty_file.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/fixtures/cmp/empty_file.txt @@ -0,0 +1 @@ + diff --git a/tests/fixtures/cmp/lorem_ipsum.txt b/tests/fixtures/cmp/lorem_ipsum.txt new file mode 100644 index 0000000..1675244 --- /dev/null +++ b/tests/fixtures/cmp/lorem_ipsum.txt @@ -0,0 +1,13 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing +elit. Nunc interdum suscipit sem vel ornare. Proin euismod, justo +sed mollis dictum, eros urna ultricies augue, eu pharetra mi ex id +ante. Duis convallis porttitor aliquam. Nunc vitae tincidunt ex. +Suspendisse iaculis ligula ac diam consectetur lacinia. Donec vel +velit dui. Etiam fringilla, dolor quis tempor vehicula, lacus +turpis bibendum velit, et pellentesque elit odio a magna. Cras +vulputate tortor non libero vehicula euismod. Aliquam tincidunt +nisl eget enim cursus, viverra sagittis magna commodo. Cras rhoncus +egestas leo nec blandit. Suspendisse potenti. Etiam ullamcorper +leo vel lacus vestibulum, cursus semper eros efficitur. In hac +habitasse platea dictumst. Phasellus scelerisque vehicula +fringilla. diff --git a/tests/fixtures/cmp/lorem_ipsum_diff.txt b/tests/fixtures/cmp/lorem_ipsum_diff.txt new file mode 100644 index 0000000..bb89a7f --- /dev/null +++ b/tests/fixtures/cmp/lorem_ipsum_diff.txt @@ -0,0 +1,13 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing +elit. Nunc interdum suscipit sem vel ornare. Proin euismod, justo +sed mollis dictum, eros urna ultricies augue, eu pharetra mi ex id +ante! Duis convallis porttitor aliquam. Nunc vitae tincidunt ex. +Suspendisse iaculis ligula ac diam consectetur lacinia. Donec vel +velit dui. Etiam fringilla, dolor quis tempor vehicula, lacus +turpis bibendum velit, et pellentesque elit odio a magna. Cras +vulputate tortor non libero vehicula euismod. Aliquam tincidunt +nisl eget enim cursus, viverra sagittis magna commodo. Cras rhoncus +egestas leo nec blandit. Suspendisse potenti. Etiam ullamcorper +leo vel lacus vestibulum, cursus semper eros efficitur. In hac +habitasse platea dictumst. Phasellus scelerisque vehicula +fringilla! diff --git a/tests/fixtures/cmp/lorem_ipsum_equal.txt b/tests/fixtures/cmp/lorem_ipsum_equal.txt new file mode 100644 index 0000000..1675244 --- /dev/null +++ b/tests/fixtures/cmp/lorem_ipsum_equal.txt @@ -0,0 +1,13 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing +elit. Nunc interdum suscipit sem vel ornare. Proin euismod, justo +sed mollis dictum, eros urna ultricies augue, eu pharetra mi ex id +ante. Duis convallis porttitor aliquam. Nunc vitae tincidunt ex. +Suspendisse iaculis ligula ac diam consectetur lacinia. Donec vel +velit dui. Etiam fringilla, dolor quis tempor vehicula, lacus +turpis bibendum velit, et pellentesque elit odio a magna. Cras +vulputate tortor non libero vehicula euismod. Aliquam tincidunt +nisl eget enim cursus, viverra sagittis magna commodo. Cras rhoncus +egestas leo nec blandit. Suspendisse potenti. Etiam ullamcorper +leo vel lacus vestibulum, cursus semper eros efficitur. In hac +habitasse platea dictumst. Phasellus scelerisque vehicula +fringilla. diff --git a/tests/integration.rs b/tests/integration.rs deleted file mode 100644 index 0e8d21e..0000000 --- a/tests/integration.rs +++ /dev/null @@ -1,890 +0,0 @@ -// This file is part of the uutils diffutils package. -// -// For the full copyright and license information, please view the LICENSE-* -// files that was distributed with this source code. - -use assert_cmd::cargo::cargo_bin_cmd; -use predicates::prelude::*; -use std::fs::File; -#[cfg(not(windows))] -use std::fs::OpenOptions; -use std::io::Write; -use tempfile::{tempdir, NamedTempFile}; - -// Integration tests for the diffutils command -mod common { - use super::*; - - #[test] - fn unknown_param() -> Result<(), Box> { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("patch"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::eq("patch: utility not supported\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::starts_with( - "Expected utility name as second argument, got nothing.\n", - )); - - for subcmd in ["diff", "cmp"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg(subcmd); - cmd.arg("--foobar"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::contains("unrecognized option '--foobar'")); - } - Ok(()) - } - - #[test] - fn cannot_read_files() -> Result<(), Box> { - let file = NamedTempFile::new()?; - - let nofile = NamedTempFile::new()?; - let nopath = nofile.into_temp_path(); - std::fs::remove_file(&nopath)?; - - #[cfg(not(windows))] - let error_message = "No such file or directory"; - #[cfg(windows)] - let error_message = "The system cannot find the file specified."; - - for subcmd in ["diff", "cmp"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg(subcmd); - cmd.arg(&nopath).arg(file.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg(subcmd); - cmd.arg(file.path()).arg(&nopath); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - ))); - } - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg(&nopath).arg(&nopath); - cmd.assert().code(predicate::eq(2)).failure().stderr( - predicate::str::contains(format!( - ": {}: {error_message}\n", - &nopath.as_os_str().to_string_lossy() - )) - .count(2), - ); - - Ok(()) - } -} - -mod diff { - use diffutilslib::assert_diff_eq; - - use super::*; - - #[test] - fn no_differences() -> Result<(), Box> { - let file = NamedTempFile::new()?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file.path()).arg(file.path()); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - } - Ok(()) - } - - #[test] - fn no_differences_report_identical_files() -> Result<(), Box> { - // same file - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file1.path()); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file1.path().to_string_lossy(), - ))); - } - // two files with the same content - let mut file2 = NamedTempFile::new()?; - file2.write_all("foo\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-s").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::eq(format!( - "Files {} and {} are identical\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy(), - ))); - } - Ok(()) - } - - #[test] - fn differences() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::str::is_empty().not()); - } - Ok(()) - } - - #[test] - fn differences_brief() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - for option in ["", "-u", "-c", "-e"] { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - if !option.is_empty() { - cmd.arg(option); - } - cmd.arg("-q").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::eq(format!( - "Files {} and {} differ\n", - file1.path().to_string_lossy(), - file2.path().to_string_lossy() - ))); - } - Ok(()) - } - - #[test] - fn missing_newline() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar".as_bytes())?; - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-e").arg(file1.path()).arg(file2.path()); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::starts_with("No newline at end of file")); - Ok(()) - } - - #[test] - fn read_from_stdin() -> Result<(), Box> { - let mut file1 = NamedTempFile::new()?; - file1.write_all("foo\n".as_bytes())?; - let mut file2 = NamedTempFile::new()?; - file2.write_all("bar\n".as_bytes())?; - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("-") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ -\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u") - .arg("-") - .arg(file2.path()) - .write_stdin("foo\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- -\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file2.path().to_string_lossy() - ) - ); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u").arg("-").arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()); - - #[cfg(unix)] - { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u") - .arg(file1.path()) - .arg("/dev/stdin") - .write_stdin("bar\n"); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ /dev/stdin\tTIMESTAMP\n@@ -1 +1 @@\n-foo\n+bar\n", - file1.path().to_string_lossy() - ) - ); - } - - Ok(()) - } - - #[test] - fn compare_file_to_directory() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let directory = tmp_dir.path().join("d"); - let _ = std::fs::create_dir(&directory); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let da_path = directory.join("a"); - let mut da = File::create(&da_path).unwrap(); - da.write_all(b"da\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u").arg(&directory).arg(&a_path); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-da\n+a\n", - da_path.display(), - a_path.display() - ) - ); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("diff"); - cmd.arg("-u").arg(&a_path).arg(&directory); - cmd.assert().code(predicate::eq(1)).failure(); - - let output = cmd.output().unwrap().stdout; - assert_diff_eq!( - output, - format!( - "--- {}\tTIMESTAMP\n+++ {}\tTIMESTAMP\n@@ -1 +1 @@\n-a\n+da\n", - a_path.display(), - da_path.display() - ) - ); - - Ok(()) - } -} - -mod cmp { - use super::*; - - #[test] - fn cmp_incompatible_params() -> Result<(), Box> { - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-s"); - cmd.arg("/etc/passwd").arg("/etc/group"); - cmd.assert() - .code(predicate::eq(2)) - .failure() - .stderr(predicate::str::ends_with( - ": options -l and -s are incompatible\n", - )); - - Ok(()) - } - - #[test] - fn cmp_stdin() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg(&a_path); - cmd.write_stdin("a\n"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg(&a_path); - cmd.write_stdin("b\n"); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with(" - differ: char 1, line 1\n")); - - Ok(()) - } - - #[test] - fn cmp_equal_files() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"a\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - Ok(()) - } - - #[test] - fn cmp_one_file_empty() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"a\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let _ = File::create(&b_path).unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::contains(" EOF on ")) - .stderr(predicate::str::ends_with(" which is empty\n")); - - Ok(()) - } - - #[test] - fn cmp_immediate_difference() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"abc\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"bcd\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-b"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with( - " differ: byte 1, line 1 is 141 a 142 b\n", - )); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::eq("1 141 142\n2 142 143\n3 143 144\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::eq( - "1 141 a 142 b\n2 142 b 143 c\n3 143 c 144 d\n", - )); - - Ok(()) - } - - #[test] - fn cmp_newline_difference() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"abc\ndefg").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"abc\ndef\ng").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with(" differ: char 8, line 2\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-b"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with( - " differ: byte 8, line 2 is 147 g 12 ^J\n", - )); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::str::starts_with("8 147 12\n")) - .stderr(predicate::str::contains(" EOF on")) - .stderr(predicate::str::ends_with(" after byte 8\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-b"); - cmd.arg("-l"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::str::starts_with("8 147 g 12 ^J\n")) - .stderr(predicate::str::contains(" EOF on")) - .stderr(predicate::str::ends_with(" after byte 8\n")); - - Ok(()) - } - - #[test] - fn cmp_max_bytes() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"abc efg ijkl\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"abcdefghijkl\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg("-n"); - cmd.arg("3"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg("-n"); - cmd.arg("4"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::eq("4 40 144 d\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg("-n"); - cmd.arg("13"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::eq(" 4 40 144 d\n 8 40 150 h\n")); - Ok(()) - } - - #[test] - fn cmp_skip_args_parsing() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"---abc\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"###abc\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-i"); - cmd.arg("3"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - // Positional skips should be ignored - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg("-i"); - cmd.arg("3"); - cmd.arg(&a_path).arg(&b_path); - cmd.arg("1").arg("1"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - // Single positional argument should only affect first file. - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.arg("3"); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with(" differ: char 1, line 1\n")); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.env("LC_ALL", "C"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.arg("3"); - cmd.arg("3"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - Ok(()) - } - - #[test] - fn cmp_skip_suffix_parsing() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - writeln!(a, "{}c", "a".repeat(1024)).unwrap(); - a.flush().unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - writeln!(b, "{}c", "b".repeat(1024)).unwrap(); - b.flush().unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("--ignore-initial=1K"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - Ok(()) - } - - #[test] - fn cmp_skip() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(b"abc efg ijkl\n").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(b"abcdefghijkl\n").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg("-i"); - cmd.arg("8"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::is_empty()); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-b"); - cmd.arg("-i"); - cmd.arg("4"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stderr(predicate::str::is_empty()) - .stdout(predicate::str::ends_with( - " differ: byte 4, line 1 is 40 150 h\n", - )); - - Ok(()) - } - - #[test] - fn cmp_binary() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - let mut bytes = vec![0, 15, 31, 32, 33, 40, 64, 126, 127, 128, 129, 200, 254, 255]; - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(&bytes).unwrap(); - - bytes.reverse(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(&bytes).unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-l"); - cmd.arg("-b"); - cmd.arg(&a_path).arg(&b_path); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::eq(concat!( - " 1 0 ^@ 377 M-^?\n", - " 2 17 ^O 376 M-~\n", - " 3 37 ^_ 310 M-H\n", - " 4 40 201 M-^A\n", - " 5 41 ! 200 M-^@\n", - " 6 50 ( 177 ^?\n", - " 7 100 @ 176 ~\n", - " 8 176 ~ 100 @\n", - " 9 177 ^? 50 (\n", - "10 200 M-^@ 41 !\n", - "11 201 M-^A 40 \n", - "12 310 M-H 37 ^_\n", - "13 376 M-~ 17 ^O\n", - "14 377 M-^? 0 ^@\n" - ))); - - Ok(()) - } - - #[test] - #[cfg(not(windows))] - fn cmp_fast_paths() -> Result<(), Box> { - let tmp_dir = tempdir()?; - - // This test mimics one found in the GNU cmp test suite. It is used for - // validating the /dev/null optimization. - let a_path = tmp_dir.path().join("a"); - let a = File::create(&a_path).unwrap(); - a.set_len(14 * 1024 * 1024 * 1024 * 1024).unwrap(); - - let b_path = tmp_dir.path().join("b"); - let b = File::create(&b_path).unwrap(); - b.set_len(15 * 1024 * 1024 * 1024 * 1024).unwrap(); - - let dev_null = OpenOptions::new().write(true).open("/dev/null").unwrap(); - - let mut child = std::process::Command::new(assert_cmd::cargo::cargo_bin!("diffutils")) - .arg("cmp") - .arg(&a_path) - .arg(&b_path) - .stdout(dev_null) - .spawn() - .unwrap(); - - // Bound the runtime to a very short time that still allows for some resource - // constraint to slow it down while also allowing very fast systems to exit as - // early as possible. - const MAX_TRIES: u8 = 50; - for tries in 0..=MAX_TRIES { - if tries == MAX_TRIES { - panic!("cmp took too long to run, /dev/null optimization probably not working") - } - match child.try_wait() { - Ok(Some(status)) => { - assert_eq!(status.code(), Some(1)); - break; - } - Ok(None) => (), - Err(e) => panic!("{e:#?}"), - } - std::thread::sleep(std::time::Duration::from_millis(10)); - } - - // Two stdins should be equal - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg("-"); - cmd.arg("-"); - cmd.assert() - .code(predicate::eq(0)) - .success() - .stdout(predicate::str::is_empty()) - .stderr(predicate::str::is_empty()); - - // Files with longer than block size equal segments should still report - // the correct line number for the difference. Assumes 8KB block size (see - // https://github.com/rust-lang/rust/blob/master/library/std/src/sys_common/io.rs), - // create a 24KB equality. - let mut bytes = " ".repeat(4095); - bytes.push('\n'); - bytes.push_str(&" ".repeat(4096)); - - let bytes = bytes.repeat(3); - let bytes = bytes.as_bytes(); - - let a_path = tmp_dir.path().join("a"); - let mut a = File::create(&a_path).unwrap(); - a.write_all(bytes).unwrap(); - a.write_all(b"A").unwrap(); - - let b_path = tmp_dir.path().join("b"); - let mut b = File::create(&b_path).unwrap(); - b.write_all(bytes).unwrap(); - b.write_all(b"B").unwrap(); - - let mut cmd = cargo_bin_cmd!("diffutils"); - cmd.arg("cmp"); - cmd.arg(&a_path).arg(&b_path); - cmd.env("LC_ALL", "en_US"); - cmd.assert() - .code(predicate::eq(1)) - .failure() - .stdout(predicate::str::ends_with(" differ: byte 24577, line 4\n")); - - Ok(()) - } -} diff --git a/tests/test_common.rs b/tests/test_common.rs new file mode 100644 index 0000000..b32edb5 --- /dev/null +++ b/tests/test_common.rs @@ -0,0 +1,99 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE-* +// files that was distributed with this source code. + +// spell-checker:ignore ndefg ijkl + +use assert_cmd::cargo::cargo_bin_cmd; +use predicates::prelude::*; +use tempfile::NamedTempFile; +// use uutests::new_ucmd; does not work for diffutils itself + +// Integration tests for the diffutils command +mod common { + + use super::*; + + #[test] + fn test_unknown_param() -> Result<(), Box> { + // no util as argument + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.assert() + .code(predicate::eq(0)) + .success() + .stdout(predicate::str::contains("Usage: diffutils")); + + // util not recognized + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("exterminator"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::eq("diffutils: unknown program 'exterminator'\n")); + + for sub_cmd in ["diff", "cmp"] { + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg(sub_cmd); + cmd.arg("--foobar"); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::contains("unexpected option '--foobar'")); + } + Ok(()) + } + + #[test] + fn cannot_read_files() -> Result<(), Box> { + let file = NamedTempFile::new()?; + + let no_file = NamedTempFile::new()?; + let no_path = no_file.into_temp_path(); + std::fs::remove_file(&no_path)?; + + // #[cfg(not(windows))] + let error_message = "No such file or directory"; + // #[cfg(windows)] + // let error_message = "The system cannot find the file specified."; + + for sub_cmd in ["diff", "cmp"] { + // dbg!(&sub_cmd, &no_path.as_os_str().to_string_lossy()); + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg(sub_cmd); + cmd.arg(&no_path).arg(file.path()); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &no_path.as_os_str().to_string_lossy() + ))); + + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg(sub_cmd); + cmd.arg(file.path()).arg(&no_path); + cmd.assert() + .code(predicate::eq(2)) + .failure() + .stderr(predicate::str::ends_with(format!( + ": {}: {error_message}\n", + &no_path.as_os_str().to_string_lossy() + ))); + } + + // This requires two error messages. This is difficult to replicate + let mut cmd = cargo_bin_cmd!("diffutils"); + cmd.arg("diff"); + cmd.arg(&no_path).arg(&no_path); + cmd.assert().code(predicate::eq(2)).failure().stderr( + predicate::str::contains(format!( + ": {}: {error_message}\n", + &no_path.as_os_str().to_string_lossy() + )) + .count(2), + ); + + Ok(()) + } +} diff --git a/tests/tests.rs b/tests/tests.rs new file mode 100644 index 0000000..702b39f --- /dev/null +++ b/tests/tests.rs @@ -0,0 +1,25 @@ +// This file is part of the uutils diffutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::env; + +pub const TESTS_BINARY: &str = env!("CARGO_BIN_EXE_diffutils"); + +// Use the ctor attribute to run this function before any tests +#[ctor::ctor] +fn init() { + unsafe { + // Necessary for uutests to be able to find the binary + env::set_var("UUTESTS_BINARY_PATH", TESTS_BINARY); + } +} + +#[cfg(feature = "cmp")] +#[path = "by-util/test_cmp.rs"] +mod test_cmp; + +#[cfg(feature = "diff")] +#[path = "by-util/test_diff.rs"] +mod test_diff; diff --git a/tests/uutests/Cargo.toml b/tests/uutests/Cargo.toml new file mode 100644 index 0000000..d96183f --- /dev/null +++ b/tests/uutests/Cargo.toml @@ -0,0 +1,47 @@ +# spell-checker:ignore (features) zerocopy serde + +[package] +name = "uutests" +description = "uutils ~ 'core' uutils test library (cross-platform)" +repository = "https://github.com/uutils/coreutils/tree/main/tests/uutests" +authors.workspace = true +categories.workspace = true +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +version.workspace = true + +[package.metadata.docs.rs] +all-features = true + +[lints] +workspace = true + +[lib] +path = "src/lib/lib.rs" + +[dependencies] +ctor = { workspace = true } +libc = { workspace = true } +pretty_assertions = { workspace = true } +rand = { workspace = true } +regex = { workspace = true } +tempfile = { workspace = true } +uucore = { workspace = true, features = [ + "mode", + "entries", + "process", + "signals", + "utmpx", +] } + +[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] + +[target.'cfg(unix)'.dependencies] +nix = { workspace = true, features = ["process", "signal", "term", "user"] } +rlimit = { workspace = true } + +[target.'cfg(all(unix, not(any(target_os = "macos", target_os = "openbsd"))))'.dependencies] +xattr = { workspace = true } diff --git a/tests/uutests/src/lib/lib.rs b/tests/uutests/src/lib/lib.rs new file mode 100644 index 0000000..05e2b13 --- /dev/null +++ b/tests/uutests/src/lib/lib.rs @@ -0,0 +1,8 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +#[macro_use] +pub mod macros; +pub mod random; +pub mod util; diff --git a/tests/uutests/src/lib/macros.rs b/tests/uutests/src/lib/macros.rs new file mode 100644 index 0000000..f94f17a --- /dev/null +++ b/tests/uutests/src/lib/macros.rs @@ -0,0 +1,109 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +/// Platform-independent helper for constructing a `PathBuf` from individual elements +#[macro_export] +macro_rules! path_concat { + ($e:expr, ..$n:expr) => {{ + use std::path::PathBuf; + let n = $n; + let mut pb = PathBuf::new(); + for _ in 0..n { + pb.push($e); + } + pb.to_str().unwrap().to_owned() + }}; + ($($e:expr),*) => {{ + use std::path::PathBuf; + let mut pb = PathBuf::new(); + $( + pb.push($e); + )* + pb.to_str().unwrap().to_owned() + }}; +} + +/// Deduce the name of the test binary from the test filename. +/// +/// e.g.: `tests/by-util/test_cat.rs` -> `cat` +#[macro_export] +macro_rules! util_name { + () => { + module_path!() + .split("_") + .nth(1) + .and_then(|s| s.split("::").next()) + .expect("no test name") + }; +} + +/// Convenience macro for acquiring a [`UCommand`] builder. +/// +/// Returns the following: +/// - a [`UCommand`] builder for invoking the binary to be tested +/// +/// This macro is intended for quick, single-call tests. For more complex tests +/// that require multiple invocations of the tested binary, see [`TestScenario`] +/// +/// [`UCommand`]: crate::util::UCommand +/// [`TestScenario`]: crate::util::TestScenario +#[macro_export] +macro_rules! new_ucmd { + () => { + ::uutests::util::TestScenario::new(::uutests::util_name!()).ucmd() + }; +} + +/// Convenience macro for acquiring a [`UCommand`] builder and a test path. +/// +/// Returns a tuple containing the following: +/// - an [`AtPath`] that points to a unique temporary test directory +/// - a [`UCommand`] builder for invoking the binary to be tested +/// +/// This macro is intended for quick, single-call tests. For more complex tests +/// that require multiple invocations of the tested binary, see [`TestScenario`] +/// +/// [`UCommand`]: crate::util::UCommand +/// [`AtPath`]: crate::util::AtPath +/// [`TestScenario`]: crate::util::TestScenario +#[macro_export] +macro_rules! at_and_ucmd { + () => {{ + let ts = ::uutests::util::TestScenario::new(::uutests::util_name!()); + (ts.fixtures.clone(), ts.ucmd()) + }}; +} + +/// Convenience macro for acquiring a [`TestScenario`] with its test path. +/// +/// Returns a tuple containing the following: +/// - a [`TestScenario`] for invoking commands +/// - an [`AtPath`] that points to a unique temporary test directory +/// +/// [`AtPath`]: crate::util::AtPath +/// [`TestScenario`]: crate::util::TestScenario +#[macro_export] +macro_rules! at_and_ts { + () => {{ + let ts = ::uutests::util::TestScenario::new(::uutests::util_name!()); + (ts.fixtures.clone(), ts) + }}; +} + +/// If `common::util::expected_result` returns an error, i.e. the `util` in `$PATH` doesn't +/// include a coreutils version string or the version is too low, +/// this macro can be used to automatically skip the test and print the reason. +#[macro_export] +macro_rules! unwrap_or_return { + ( $e:expr ) => { + match $e { + Ok(x) => x, + Err(e) => { + println!("test skipped: {e}"); + return; + } + } + }; +} diff --git a/tests/uutests/src/lib/mod.rs b/tests/uutests/src/lib/mod.rs new file mode 100644 index 0000000..05e2b13 --- /dev/null +++ b/tests/uutests/src/lib/mod.rs @@ -0,0 +1,8 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +#[macro_use] +pub mod macros; +pub mod random; +pub mod util; diff --git a/tests/uutests/src/lib/random.rs b/tests/uutests/src/lib/random.rs new file mode 100644 index 0000000..cdb64a1 --- /dev/null +++ b/tests/uutests/src/lib/random.rs @@ -0,0 +1,347 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +#![allow(clippy::naive_bytecount)] + +use rand::{ + Rng, RngExt as _, + distr::{Distribution, Uniform}, + rng, +}; + +/// Samples alphanumeric characters `[A-Za-z0-9]` including newline `\n` +/// +/// # Examples +/// +/// ```rust,ignore +/// use rand::{Rng, rng}; +/// +/// let vec = rng() +/// .sample_iter(AlphanumericNewline) +/// .take(10) +/// .collect::>(); +/// println!("Random chars: {}", String::from_utf8(vec).unwrap()); +/// ``` +#[derive(Clone, Copy, Debug)] +pub struct AlphanumericNewline; + +impl AlphanumericNewline { + /// The charset to act upon + const CHARSET: &'static [u8] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n"; + + /// Generate a random byte from [`Self::CHARSET`] and return it as `u8`. + /// + /// # Arguments + /// + /// * `rng`: A [`rand::Rng`] + /// + /// returns: u8 + fn random(rng: &mut R) -> u8 + where + R: Rng + ?Sized, + { + let idx = rng.random_range(0..Self::CHARSET.len()); + Self::CHARSET[idx] + } +} + +impl Distribution for AlphanumericNewline { + fn sample(&self, rng: &mut R) -> u8 { + Self::random(rng) + } +} + +/// Generate a random string from a [`Distribution`] +/// +/// # Examples +/// +/// ```rust,ignore +/// use crate::common::random::{AlphanumericNewline, RandomizedString}; +/// use rand::distributions::Alphanumeric; +/// +/// // generates a 100 byte string with characters from AlphanumericNewline +/// let random_string = RandomizedString::generate(AlphanumericNewline, 100); +/// assert_eq!(100, random_string.len()); +/// +/// // generates a 100 byte string with 10 newline characters not ending with a newline +/// let string = RandomizedString::generate_with_delimiter(Alphanumeric, b'\n', 10, false, 100); +/// assert_eq!(100, random_string.len()); +/// ``` +pub struct RandomizedString; + +impl RandomizedString { + /// Generate a random string from the given [`Distribution`] with the given `length` in bytes. + /// + /// # Arguments + /// + /// * `dist`: A u8 [`Distribution`] + /// * `length`: the length of the resulting string in bytes + /// + /// returns: String + pub fn generate(dist: D, length: usize) -> String + where + D: Distribution, + { + rng() + .sample_iter(dist) + .take(length) + .map(|b| b as char) + .collect() + } + + /// Generate a random string from the [`Distribution`] with the given `length` in bytes. The + /// function takes a `delimiter`, which is randomly distributed in the string, such that exactly + /// `num_delimiter` amount of `delimiter`s occur. If `end_with_delimiter` is set, then the + /// string ends with the delimiter, else the string does not end with the delimiter. + /// + /// # Arguments + /// + /// * `dist`: A `u8` [`Distribution`] + /// * `delimiter`: A `u8` delimiter, which does not need to be included in the `Distribution` + /// * `num_delimiter`: The number of `delimiter`s contained in the resulting string + /// * `end_with_delimiter`: If the string shall end with the given delimiter + /// * `length`: the length of the resulting string in bytes + /// + /// returns: String + /// + /// # Examples + /// + /// ```rust,ignore + /// use crate::common::random::{AlphanumericNewline, RandomizedString}; + /// + /// // generates a 100 byte string with 10 '\0' byte characters not ending with a '\0' byte + /// let string = RandomizedString::generate_with_delimiter(AlphanumericNewline, 0, 10, false, 100); + /// assert_eq!(100, random_string.len()); + /// assert_eq!( + /// 10, + /// random_string.as_bytes().iter().filter(|p| **p == 0).count() + /// ); + /// assert!(!random_string.as_bytes().ends_with(&[0])); + /// ``` + pub fn generate_with_delimiter( + dist: D, + delimiter: u8, + num_delimiter: usize, + end_with_delimiter: bool, + length: usize, + ) -> String + where + D: Distribution, + { + if length == 0 { + return String::new(); + } else if length == 1 { + return if num_delimiter > 0 { + String::from(delimiter as char) + } else { + String::from(rng().sample(&dist) as char) + }; + } + + let samples = length - 1; + let mut result: Vec = rng().sample_iter(&dist).take(samples).collect(); + + if num_delimiter == 0 { + result.push(rng().sample(&dist)); + return String::from_utf8(result).unwrap(); + } + + let num_delimiter = if end_with_delimiter { + num_delimiter - 1 + } else { + num_delimiter + }; + + // it's safe to unwrap because samples is always > 0, thus low < high + let between = Uniform::new(0, samples).unwrap(); + for _ in 0..num_delimiter { + let mut pos = between.sample(&mut rng()); + let turn = pos; + while result[pos] == delimiter { + pos += 1; + if pos >= samples { + pos = 0; + } + if pos == turn { + break; + } + } + result[pos] = delimiter; + } + + if end_with_delimiter { + result.push(delimiter); + } else { + result.push(rng().sample(&dist)); + } + + String::from_utf8(result).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rand::distr::Alphanumeric; + + #[test] + fn test_random_string_generate() { + let random_string = RandomizedString::generate(AlphanumericNewline, 0); + assert_eq!(0, random_string.len()); + + let random_string = RandomizedString::generate(AlphanumericNewline, 1); + assert_eq!(1, random_string.len()); + + let random_string = RandomizedString::generate(AlphanumericNewline, 100); + assert_eq!(100, random_string.len()); + } + + #[test] + fn test_random_string_generate_with_delimiter_when_length_is_zero() { + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 0, false, 0); + assert_eq!(0, random_string.len()); + } + + #[test] + fn test_random_string_generate_with_delimiter_when_num_delimiter_is_greater_than_length() { + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 2, false, 1); + assert_eq!(1, random_string.len()); + assert!(random_string.as_bytes().contains(&0)); + assert!(random_string.as_bytes().ends_with(&[0])); + } + + #[test] + #[allow(clippy::cognitive_complexity)] // Ignore clippy lint of too long function sign + fn test_random_string_generate_with_delimiter_should_end_with_delimiter() { + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, true, 1); + assert_eq!(1, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, false, 1); + assert_eq!(1, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, true, 2); + assert_eq!(2, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 2, true, 2); + assert_eq!(2, random_string.len()); + assert_eq!( + 2, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, true, 3); + assert_eq!(3, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + } + + #[test] + #[allow(clippy::cognitive_complexity)] // Ignore clippy lint of too long function sign + fn test_random_string_generate_with_delimiter_should_not_end_with_delimiter() { + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 0, false, 1); + assert_eq!(1, random_string.len()); + assert_eq!( + 0, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 0, true, 1); + assert_eq!(1, random_string.len()); + assert_eq!( + 0, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, false, 2); + assert_eq!(2, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(!random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 1, false, 3); + assert_eq!(3, random_string.len()); + assert_eq!( + 1, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(!random_string.as_bytes().ends_with(&[0])); + + let random_string = RandomizedString::generate_with_delimiter(Alphanumeric, 0, 2, false, 3); + assert_eq!(3, random_string.len()); + assert_eq!( + 2, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(!random_string.as_bytes().ends_with(&[0])); + } + + #[test] + fn test_generate_with_delimiter_with_greater_length() { + let random_string = + RandomizedString::generate_with_delimiter(Alphanumeric, 0, 100, false, 1000); + assert_eq!(1000, random_string.len()); + assert_eq!( + 100, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(!random_string.as_bytes().ends_with(&[0])); + + let random_string = + RandomizedString::generate_with_delimiter(Alphanumeric, 0, 100, true, 1000); + assert_eq!(1000, random_string.len()); + assert_eq!( + 100, + random_string.as_bytes().iter().filter(|p| **p == 0).count() + ); + assert!(random_string.as_bytes().ends_with(&[0])); + } + + /// Originally used to exclude an error within the `random` module. The two + /// affected tests timed out on windows, but only in the ci. These tests are + /// also the source for the concrete numbers. The timed out tests are + /// `test_tail.rs::test_pipe_when_lines_option_given_input_size_has_multiple_size_of_buffer_size` + /// `test_tail.rs::test_pipe_when_bytes_option_given_input_size_has_multiple_size_of_buffer_size`. + #[test] + fn test_generate_random_strings_when_length_is_around_critical_buffer_sizes() { + let length = 8192 * 3; + let random_string = RandomizedString::generate(AlphanumericNewline, length); + assert_eq!(length, random_string.len()); + + let length = 8192 * 3 + 1; + let random_string = + RandomizedString::generate_with_delimiter(Alphanumeric, b'\n', 100, true, length); + assert_eq!(length, random_string.len()); + assert_eq!( + 100, + random_string + .as_bytes() + .iter() + .filter(|p| **p == b'\n') + .count() + ); + assert!(!random_string.as_bytes().ends_with(&[0])); + } +} diff --git a/tests/uutests/src/lib/util.rs b/tests/uutests/src/lib/util.rs new file mode 100644 index 0000000..1625840 --- /dev/null +++ b/tests/uutests/src/lib/util.rs @@ -0,0 +1,3614 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +//spell-checker: ignore (linux) rlimit prlimit coreutil ggroups uchild uncaptured scmd SHLVL canonicalized openpty +//spell-checker: ignore (linux) winsize xpixel ypixel setrlimit FSIZE SIGBUS SIGSEGV sigbus tmpfs mksocket +//spell-checker: ignore (ToDO) ttyname + +#![allow(dead_code)] +#![allow( + clippy::too_many_lines, + clippy::should_panic_without_expect, + clippy::missing_errors_doc +)] + +use core::str; +#[cfg(unix)] +use libc::mode_t; +#[cfg(unix)] +use nix::pty::OpenptyResult; +#[cfg(unix)] +use nix::sys; +#[cfg(not(windows))] +use nix::sys::stat::{self, SFlag}; +use pretty_assertions::assert_eq; +#[cfg(unix)] +use rlimit::setrlimit; +use std::borrow::Cow; +use std::collections::VecDeque; +#[cfg(not(windows))] +use std::ffi::CString; +use std::ffi::{OsStr, OsString}; +use std::fs::{self, File, OpenOptions, hard_link, remove_file}; +use std::io::{self, BufWriter, Read, Result, Write}; +#[cfg(unix)] +use std::os::fd::OwnedFd; +#[cfg(unix)] +use std::os::unix::fs::{PermissionsExt, symlink as symlink_dir, symlink as symlink_file}; +#[cfg(unix)] +use std::os::unix::net::UnixListener; +#[cfg(unix)] +use std::os::unix::process::CommandExt; +#[cfg(unix)] +use std::os::unix::process::ExitStatusExt; +#[cfg(windows)] +use std::os::windows::fs::{symlink_dir, symlink_file}; +#[cfg(windows)] +use std::path::MAIN_SEPARATOR_STR; +use std::path::{Path, PathBuf}; +use std::process::{Child, Command, ExitStatus, Output, Stdio}; +use std::rc::Rc; +use std::sync::mpsc::{self, RecvTimeoutError}; +use std::thread::{JoinHandle, sleep}; +use std::time::{Duration, Instant}; +use std::{env, hint, mem, thread}; +use tempfile::{Builder, TempDir}; + +use std::sync::OnceLock; + +static TESTS_DIR: &str = "tests"; +static FIXTURES_DIR: &str = "fixtures"; + +static ALREADY_RUN: &str = " you have already run this UCommand, if you want to run \ + another command in the same test, use TestScenario::new instead of \ + testing();"; +static MULTIPLE_STDIN_MEANINGLESS: &str = "Ucommand is designed around a typical use case of: provide args and input stream -> spawn process -> block until completion -> return output streams. For verifying that a particular section of the input stream is what causes a particular behavior, use the Command type directly."; + +static NO_STDIN_MEANINGLESS: &str = "Setting this flag has no effect if there is no stdin"; +static END_OF_TRANSMISSION_SEQUENCE: &[u8] = b"\n\x04"; + +static TESTS_BINARY_PATH: OnceLock = OnceLock::new(); +/// This function needs the env variable UUTESTS_BINARY_PATH +/// which will very probably be env!("`CARGO_BIN_EXE_`") +/// because here, we are in a crate but we need the name of the final binary +pub fn get_tests_binary() -> &'static str { + TESTS_BINARY_PATH.get_or_init(|| { + if let Ok(path) = env::var("UUTESTS_BINARY_PATH") { + return PathBuf::from(path); + } + panic!("Could not determine coreutils binary path. Please set UUTESTS_BINARY_PATH environment variable"); + }) + .to_str() + .unwrap() +} + +#[macro_export] +macro_rules! get_tests_binary { + () => { + $crate::util::get_tests_binary() + }; +} + +pub const PATH: &str = env!("PATH"); + +/// Default environment variables to run the commands with +const DEFAULT_ENV: [(&str, &str); 2] = [("LC_ALL", "C"), ("TZ", "UTC")]; + +/// Test if the program is running under CI +pub fn is_ci() -> bool { + env::var("CI").is_ok_and(|s| s.eq_ignore_ascii_case("true")) +} + +/// Read a test scenario fixture, returning its bytes +fn read_scenario_fixture>(tmpd: Option<&Rc>, file_rel_path: S) -> Vec { + let tmpdir_path = tmpd.as_ref().unwrap().as_ref().path(); + AtPath::new(tmpdir_path).read_bytes(file_rel_path.as_ref().to_str().unwrap()) +} + +/// A command result is the outputs of a command (streams and status code) +/// within a struct which has convenience assertion functions about those outputs +#[derive(Debug, Clone)] +pub struct CmdResult { + /// `bin_path` provided by `TestScenario` or `UCommand` + bin_path: PathBuf, + /// `util_name` provided by `TestScenario` or `UCommand` + util_name: Option, + //tmpd is used for convenience functions for asserts against fixtures + tmpd: Option>, + /// exit status for command (if there is one) + exit_status: Option, + /// captured standard output after running the Command + stdout: Vec, + /// captured standard error after running the Command + stderr: Vec, +} + +impl CmdResult { + pub fn new( + bin_path: S, + util_name: Option, + tmpd: Option>, + exit_status: Option, + stdout: U, + stderr: V, + ) -> Self + where + S: Into, + T: AsRef, + U: Into>, + V: Into>, + { + Self { + bin_path: bin_path.into(), + util_name: util_name.map(|s| s.as_ref().into()), + tmpd, + exit_status, + stdout: stdout.into(), + stderr: stderr.into(), + } + } + + /// Apply a function to `stdout` as bytes and return a new [`CmdResult`] + pub fn stdout_apply<'a, F, R>(&'a self, function: F) -> Self + where + F: Fn(&'a [u8]) -> R, + R: Into>, + { + Self::new( + self.bin_path.clone(), + self.util_name.clone(), + self.tmpd.clone(), + self.exit_status, + function(&self.stdout), + self.stderr.as_slice(), + ) + } + + /// Apply a function to `stdout` as `&str` and return a new [`CmdResult`] + pub fn stdout_str_apply<'a, F, R>(&'a self, function: F) -> Self + where + F: Fn(&'a str) -> R, + R: Into>, + { + Self::new( + self.bin_path.clone(), + self.util_name.clone(), + self.tmpd.clone(), + self.exit_status, + function(self.stdout_str()), + self.stderr.as_slice(), + ) + } + + /// Apply a function to `stderr` as bytes and return a new [`CmdResult`] + pub fn stderr_apply<'a, F, R>(&'a self, function: F) -> Self + where + F: Fn(&'a [u8]) -> R, + R: Into>, + { + Self::new( + self.bin_path.clone(), + self.util_name.clone(), + self.tmpd.clone(), + self.exit_status, + self.stdout.as_slice(), + function(&self.stderr), + ) + } + + /// Apply a function to `stderr` as `&str` and return a new [`CmdResult`] + pub fn stderr_str_apply<'a, F, R>(&'a self, function: F) -> Self + where + F: Fn(&'a str) -> R, + R: Into>, + { + Self::new( + self.bin_path.clone(), + self.util_name.clone(), + self.tmpd.clone(), + self.exit_status, + self.stdout.as_slice(), + function(self.stderr_str()), + ) + } + + /// Assert `stdout` as bytes with a predicate function returning a `bool`. + #[track_caller] + pub fn stdout_check<'a, F>(&'a self, predicate: F) -> &'a Self + where + F: Fn(&'a [u8]) -> bool, + { + assert!( + predicate(&self.stdout), + "Predicate for stdout as `bytes` evaluated to false.\nstdout='{:?}'\nstderr='{:?}'\n", + self.stdout, + self.stderr + ); + self + } + + /// Assert `stdout` as `&str` with a predicate function returning a `bool`. + #[track_caller] + pub fn stdout_str_check<'a, F>(&'a self, predicate: F) -> &'a Self + where + F: Fn(&'a str) -> bool, + { + assert!( + predicate(self.stdout_str()), + "Predicate for stdout as `str` evaluated to false.\nstdout='{}'\nstderr='{}'\n", + self.stdout_str(), + self.stderr_str() + ); + self + } + + /// Assert `stderr` as bytes with a predicate function returning a `bool`. + #[track_caller] + pub fn stderr_check<'a, F>(&'a self, predicate: F) -> &'a Self + where + F: Fn(&'a [u8]) -> bool, + { + assert!( + predicate(&self.stderr), + "Predicate for stderr as `bytes` evaluated to false.\nstdout='{:?}'\nstderr='{:?}'\n", + self.stdout, + self.stderr + ); + self + } + + /// Assert `stderr` as `&str` with a predicate function returning a `bool`. + #[track_caller] + pub fn stderr_str_check<'a, F>(&'a self, predicate: F) -> &'a Self + where + F: Fn(&'a str) -> bool, + { + assert!( + predicate(self.stderr_str()), + "Predicate for stderr as `str` evaluated to false.\nstdout='{}'\nstderr='{}'\n", + self.stdout_str(), + self.stderr_str() + ); + self + } + + /// Return the exit status of the child process, if any. + /// + /// Returns None if the child process is still running or hasn't been started. + pub fn try_exit_status(&self) -> Option { + self.exit_status + } + + /// Return the exit status of the child process. + /// + /// # Panics + /// + /// If the child process is still running or hasn't been started. + pub fn exit_status(&self) -> ExitStatus { + self.try_exit_status() + .expect("Program must be run first or has not finished, yet") + } + + /// Return the signal the child process received if any. + /// + /// # Platform specific behavior + /// + /// This method is only available on unix systems. + #[cfg(unix)] + pub fn signal(&self) -> Option { + self.exit_status().signal() + } + + /// Assert that the given signal `value` equals the signal the child process received. + /// + /// See also [`std::os::unix::process::ExitStatusExt::signal`]. + /// + /// # Platform specific behavior + /// + /// This assertion method is only available on unix systems. + #[cfg(unix)] + #[track_caller] + pub fn signal_is(&self, value: i32) -> &Self { + let actual = self.signal().unwrap_or_else(|| { + panic!( + "Expected process to be terminated by the '{value}' signal, but exit status is: '{}'", + self.try_exit_status() + .map_or("Not available".to_string(), |e| e.to_string()) + ) + }); + + assert_eq!(actual, value); + self + } + + /// Assert that the given signal `name` equals the signal the child process received. + /// + /// Strings like `SIGINT`, `INT` or a number like `15` are all valid names. See also + /// [`std::os::unix::process::ExitStatusExt::signal`] and + /// [`uucore::signals::signal_by_name_or_value`] + /// + /// # Platform specific behavior + /// + /// This assertion method is only available on unix systems. + #[cfg(unix)] + #[track_caller] + pub fn signal_name_is(&self, name: &str) -> &Self { + use uucore::signals::signal_by_name_or_value; + let expected: i32 = signal_by_name_or_value(name) + .unwrap_or_else(|| panic!("Invalid signal name or value: '{name}'")) + .try_into() + .unwrap(); + + let actual = self.signal().unwrap_or_else(|| { + panic!( + "Expected process to be terminated by the '{name}' signal, but exit status is: '{}'", + self.try_exit_status() + .map_or("Not available".to_string(), |e| e.to_string()) + ) + }); + + assert_eq!(actual, expected); + self + } + + /// Returns a reference to the program's standard output as a slice of bytes + pub fn stdout(&self) -> &[u8] { + &self.stdout + } + + /// Returns the program's standard output as a string slice + pub fn stdout_str(&self) -> &str { + std::str::from_utf8(&self.stdout).unwrap() + } + + /// Returns the program's standard output as a string, automatically handling invalid utf8 + pub fn stdout_str_lossy(self) -> String { + String::from_utf8_lossy(&self.stdout).to_string() + } + + /// Returns the program's standard output as a string + /// consumes self + pub fn stdout_move_str(self) -> String { + String::from_utf8(self.stdout).unwrap() + } + + /// Returns the program's standard output as a vec of bytes + /// consumes self + pub fn stdout_move_bytes(self) -> Vec { + self.stdout + } + + /// Returns a reference to the program's standard error as a slice of bytes + pub fn stderr(&self) -> &[u8] { + &self.stderr + } + + /// Returns the program's standard error as a string slice + pub fn stderr_str(&self) -> &str { + std::str::from_utf8(&self.stderr).unwrap() + } + + /// Returns the program's standard error as a string slice, automatically handling invalid utf8 + pub fn stderr_str_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.stderr) + } + + /// Returns the program's standard error as a string + /// consumes self + pub fn stderr_move_str(self) -> String { + String::from_utf8(self.stderr).unwrap() + } + + /// Returns the program's standard error as a vec of bytes + /// consumes self + pub fn stderr_move_bytes(self) -> Vec { + self.stderr + } + + /// Returns the program's exit code + /// Panics if not run or has not finished yet for example when run with `run_no_wait()` + pub fn code(&self) -> i32 { + self.exit_status().code().unwrap() + } + + /// Verify the exit code of the program + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!().arg("--definitely-invalid").fails().code_is(1); + /// ``` + #[track_caller] + pub fn code_is(&self, expected_code: i32) -> &Self { + let fails = self.code() != expected_code; + if fails { + eprintln!( + "stdout:\n{}\nstderr:\n{}", + self.stdout_str(), + self.stderr_str() + ); + } + assert_eq!(self.code(), expected_code); + self + } + + /// Returns the program's `TempDir` + /// Panics if not present + pub fn tmpd(&self) -> Rc { + match &self.tmpd { + Some(ptr) => ptr.clone(), + None => panic!("Command not associated with a TempDir"), + } + } + + /// Returns whether the program succeeded + pub fn succeeded(&self) -> bool { + self.exit_status.is_none_or(|e| e.success()) + } + + /// asserts that the command resulted in a success (zero) status code + #[track_caller] + pub fn success(&self) -> &Self { + assert!( + self.succeeded(), + "Command was expected to succeed. code: {}\nstdout = {}\n stderr = {}", + self.code(), + self.stdout_str(), + self.stderr_str() + ); + self + } + + /// asserts that the command resulted in a failure (non-zero) status code + #[track_caller] + pub fn failure(&self) -> &Self { + assert!( + !self.succeeded(), + "Command was expected to fail.\nstdout = {}\n stderr = {}", + self.stdout_str(), + self.stderr_str() + ); + self + } + + /// asserts that the command resulted in empty (zero-length) stderr stream output + /// generally, it's better to use `stdout_only()` instead, + /// but you might find yourself using this function if + /// 1. you can not know exactly what stdout will be or + /// 2. you know that stdout will also be empty + /// + /// # Examples + /// + /// ```rust,ignore + /// scene.ucmd().fails().no_stderr(); + /// ``` + #[track_caller] + pub fn no_stderr(&self) -> &Self { + assert!( + self.stderr.is_empty(), + "Expected stderr to be empty, but it's:\n{}", + self.stderr_str() + ); + self + } + + /// asserts that the command resulted in empty (zero-length) stderr stream output + /// unless asserting there was neither stdout or stderr, `stderr_only` is usually a better choice + /// generally, it's better to use `stderr_only()` instead, + /// but you might find yourself using this function if + /// 1. you can not know exactly what stderr will be or + /// 2. you know that stderr will also be empty + /// new_ucmd!() + /// + /// # Examples + /// + /// ```rust,ignore + /// scene.ucmd().fails().no_stdout(); + /// ``` + #[track_caller] + pub fn no_stdout(&self) -> &Self { + assert!( + self.stdout.is_empty(), + "Expected stdout to be empty, but it's:\n{}", + self.stdout_str() + ); + self + } + + /// Assert that there is output to neither stderr nor stdout. + #[track_caller] + pub fn no_output(&self) -> &Self { + self.no_stdout().no_stderr() + } + + /// asserts that the command resulted in stdout stream output that equals the + /// passed in value, trailing whitespace are kept to force strict comparison (#1235) + /// `stdout_only()` is a better choice unless stderr may or will be non-empty + #[track_caller] + pub fn stdout_is>(&self, msg: T) -> &Self { + assert_eq!(self.stdout_str(), String::from(msg.as_ref())); + self + } + + /// like `stdout_is`, but succeeds if any elements of `expected` matches stdout. + #[track_caller] + pub fn stdout_is_any + std::fmt::Debug>(&self, expected: &[T]) -> &Self { + assert!( + expected.iter().any(|msg| self.stdout_str() == msg.as_ref()), + "stdout was {}\nExpected any of {expected:#?}", + self.stdout_str(), + ); + self + } + + /// Like `stdout_is` but newlines are normalized to `\n`. + #[track_caller] + pub fn normalized_newlines_stdout_is>(&self, msg: T) -> &Self { + let msg = msg.as_ref().replace("\r\n", "\n"); + assert_eq!(self.stdout_str().replace("\r\n", "\n"), msg); + self + } + + /// asserts that the command resulted in stdout stream output, + /// whose bytes equal those of the passed in slice + #[track_caller] + pub fn stdout_is_bytes>(&self, msg: T) -> &Self { + assert_eq!( + self.stdout, + msg.as_ref(), + "stdout as bytes wasn't equal to expected bytes. Result as strings:\nstdout ='{:?}'\nexpected='{:?}'", + std::str::from_utf8(&self.stdout), + std::str::from_utf8(msg.as_ref()), + ); + self + } + + /// like `stdout_is()`, but expects the contents of the file at the provided relative path + #[track_caller] + pub fn stdout_is_fixture>(&self, file_rel_path: T) -> &Self { + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); + self.stdout_is(String::from_utf8(contents).unwrap()) + } + + /// Assert that the bytes of stdout exactly match those of the given file. + /// + /// Contrast this with [`CmdResult::stdout_is_fixture`], which + /// decodes the contents of the file as a UTF-8 [`String`] before + /// comparison with stdout. + /// + /// # Examples + /// + /// Use this method in a unit test like this: + /// + /// ```rust,ignore + /// #[test] + /// fn test_something() { + /// new_ucmd!().succeeds().stdout_is_fixture_bytes("expected.bin"); + /// } + /// ``` + #[track_caller] + pub fn stdout_is_fixture_bytes>(&self, file_rel_path: T) -> &Self { + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); + self.stdout_is_bytes(contents) + } + + /// like `stdout_is_fixture()`, but replaces the data in fixture file based on values provided in `template_vars` + /// command output + #[track_caller] + pub fn stdout_is_templated_fixture>( + &self, + file_rel_path: T, + template_vars: &[(&str, &str)], + ) -> &Self { + let mut contents = + String::from_utf8(read_scenario_fixture(self.tmpd.as_ref(), file_rel_path)).unwrap(); + for kv in template_vars { + contents = contents.replace(kv.0, kv.1); + } + self.stdout_is(contents) + } + + /// like `stdout_is_templated_fixture`, but succeeds if any replacement by `template_vars` results in the actual stdout. + #[track_caller] + pub fn stdout_is_templated_fixture_any>( + &self, + file_rel_path: T, + template_vars: &[Vec<(String, String)>], + ) { + let contents = + String::from_utf8(read_scenario_fixture(self.tmpd.as_ref(), file_rel_path)).unwrap(); + let possible_values = template_vars.iter().map(|vars| { + let mut contents = contents.clone(); + for kv in vars { + contents = contents.replace(&kv.0, &kv.1); + } + contents + }); + self.stdout_is_any(&possible_values.collect::>()); + } + + /// assert that the command resulted in stderr stream output that equals the + /// passed in value. + /// + /// `stderr_only` is a better choice unless stdout may or will be non-empty + #[track_caller] + pub fn stderr_is>(&self, msg: T) -> &Self { + assert_eq!(self.stderr_str(), msg.as_ref()); + self + } + + /// asserts that the command resulted in stderr stream output, + /// whose bytes equal those of the passed in slice + #[track_caller] + pub fn stderr_is_bytes>(&self, msg: T) -> &Self { + assert_eq!( + &self.stderr, + msg.as_ref(), + "stderr as bytes wasn't equal to expected bytes. Result as strings:\nstderr ='{:?}'\nexpected='{:?}'", + std::str::from_utf8(&self.stderr), + std::str::from_utf8(msg.as_ref()) + ); + self + } + + /// Like `stdout_is_fixture`, but for stderr + #[track_caller] + pub fn stderr_is_fixture>(&self, file_rel_path: T) -> &Self { + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); + self.stderr_is(String::from_utf8(contents).unwrap()) + } + + /// asserts that + /// 1. the command resulted in stdout stream output that equals the + /// passed in value + /// 2. the command resulted in empty (zero-length) stderr stream output + #[track_caller] + pub fn stdout_only>(&self, msg: T) -> &Self { + self.no_stderr().stdout_is(msg) + } + + /// asserts that + /// 1. the command resulted in a stdout stream whose bytes + /// equal those of the passed in value + /// 2. the command resulted in an empty stderr stream + #[track_caller] + pub fn stdout_only_bytes>(&self, msg: T) -> &Self { + self.no_stderr().stdout_is_bytes(msg) + } + + /// like `stdout_only()`, but expects the contents of the file at the provided relative path + #[track_caller] + pub fn stdout_only_fixture>(&self, file_rel_path: T) -> &Self { + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); + self.stdout_only_bytes(contents) + } + + /// asserts that + /// 1. the command resulted in stderr stream output that equals the + /// passed in value + /// 2. the command resulted in empty (zero-length) stdout stream output + #[track_caller] + pub fn stderr_only>(&self, msg: T) -> &Self { + self.no_stdout().stderr_is(msg) + } + + /// asserts that + /// 1. the command resulted in a stderr stream whose bytes equal the ones + /// of the passed value + /// 2. the command resulted in an empty stdout stream + #[track_caller] + pub fn stderr_only_bytes>(&self, msg: T) -> &Self { + self.no_stdout().stderr_is_bytes(msg) + } + + #[track_caller] + pub fn fails_silently(&self) -> &Self { + assert!(!self.succeeded()); + assert!( + self.stderr.is_empty(), + "Expected stderr to be empty, but it's:\n{}", + self.stderr_str() + ); + self + } + + /// asserts that + /// 1. the command resulted in stderr stream output that equals the + /// the following format + /// `"{util_name}: {msg}\nTry '{bin_path} {util_name} --help' for more information."` + /// This the expected format when a `UUsageError` is returned or when `show_error!` is called + /// `msg` should be the same as the one provided to `UUsageError::new` or `show_error!` + /// + /// 2. the command resulted in empty (zero-length) stdout stream output + #[track_caller] + pub fn usage_error>(&self, msg: T) -> &Self { + self.stderr_only(format!( + "{0}: {2}\nTry '{1} {0} --help' for more information.\n", + self.util_name.as_ref().unwrap(), // This shouldn't be called using a normal command + self.bin_path.display(), + msg.as_ref() + )) + } + + /// Verify if stdout contains a specific string + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("--help") + /// .succeeds() + /// .stdout_contains("Options:"); + /// ``` + #[track_caller] + pub fn stdout_contains>(&self, cmp: T) -> &Self { + assert!( + self.stdout_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stdout_str(), + cmp.as_ref() + ); + self + } + + /// Verify if stdout contains a specific line + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("--help") + /// .succeeds() + /// .stdout_contains_line("Options:"); + /// ``` + #[track_caller] + pub fn stdout_contains_line>(&self, cmp: T) -> &Self { + assert!( + self.stdout_str().lines().any(|line| line == cmp.as_ref()), + "'{}' does not contain line '{}'", + self.stdout_str(), + cmp.as_ref() + ); + self + } + + /// Verify if stdout contains a byte sequence + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("--help") + /// .succeeds() + /// .stdout_contains_bytes(b"hello \xff"); + /// ``` + #[track_caller] + pub fn stdout_contains_bytes>(&self, cmp: T) -> &Self { + assert!( + self.stdout() + .windows(cmp.as_ref().len()) + .any(|sub| sub == cmp.as_ref()), + "'{:?}'\ndoes not contain\n'{:?}'", + self.stdout(), + cmp.as_ref() + ); + self + } + + /// Verify if stderr contains a specific string + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("-l") + /// .arg("IaMnOtAsIgNaL") + /// .fails() + /// .stderr_contains("IaMnOtAsIgNaL"); + /// ``` + #[track_caller] + pub fn stderr_contains>(&self, cmp: T) -> &Self { + assert!( + self.stderr_str().contains(cmp.as_ref()), + "'{}' does not contain '{}'", + self.stderr_str(), + cmp.as_ref() + ); + self + } + + /// Verify if stderr contains a byte sequence + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("--help") + /// .succeeds() + /// .stdout_contains_bytes(b"hello \xff"); + /// ``` + #[track_caller] + pub fn stderr_contains_bytes>(&self, cmp: T) -> &Self { + assert!( + self.stderr() + .windows(cmp.as_ref().len()) + .any(|sub| sub == cmp.as_ref()), + "'{:?}'\ndoes not contain\n'{:?}'", + self.stderr(), + cmp.as_ref() + ); + self + } + + /// Verify if stdout does not contain a specific string + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("-l") + /// .arg("IaMnOtAsIgNaL") + /// .fails() + /// .stdout_does_not_contain("Valid-signal"); + /// ``` + #[track_caller] + pub fn stdout_does_not_contain>(&self, cmp: T) -> &Self { + assert!( + !self.stdout_str().contains(cmp.as_ref()), + "'{}' contains '{}' but should not", + self.stdout_str(), + cmp.as_ref(), + ); + self + } + + /// Verify if st stderr does not contain a specific string + /// + /// # Examples + /// + /// ```rust,ignore + /// new_ucmd!() + /// .arg("-l") + /// .arg("IaMnOtAsIgNaL") + /// .fails() + /// .stderr_does_not_contain("Valid-signal"); + /// ``` + #[track_caller] + pub fn stderr_does_not_contain>(&self, cmp: T) -> &Self { + assert!(!self.stderr_str().contains(cmp.as_ref())); + self + } + + #[track_caller] + pub fn stdout_matches(&self, regex: ®ex::Regex) -> &Self { + assert!( + regex.is_match(self.stdout_str()), + "Stdout does not match regex:\n{}", + self.stdout_str() + ); + self + } + + #[track_caller] + pub fn stderr_matches(&self, regex: ®ex::Regex) -> &Self { + assert!( + regex.is_match(self.stderr_str()), + "Stderr does not match regex:\n{}", + self.stderr_str() + ); + self + } + + #[track_caller] + pub fn stdout_does_not_match(&self, regex: ®ex::Regex) -> &Self { + assert!( + !regex.is_match(self.stdout_str()), + "Stdout matches regex:\n{}", + self.stdout_str() + ); + self + } +} + +pub fn log_info, U: AsRef>(msg: T, par: U) { + println!("{}: {}", msg.as_ref(), par.as_ref()); +} + +pub fn recursive_copy(src: &Path, dest: &Path) -> Result<()> { + if fs::metadata(src)?.is_dir() { + for entry in fs::read_dir(src)? { + let entry = entry?; + let mut new_dest = PathBuf::from(dest); + new_dest.push(entry.file_name()); + if fs::metadata(entry.path())?.is_dir() { + fs::create_dir(&new_dest)?; + recursive_copy(&entry.path(), &new_dest)?; + } else { + fs::copy(entry.path(), new_dest)?; + } + } + } + Ok(()) +} + +pub fn get_root_path() -> &'static str { + if cfg!(windows) { "C:\\" } else { "/" } +} + +/// Compares the extended attributes (xattrs) of two files or directories. +/// +/// # Returns +/// +/// `true` if both paths have the same set of extended attributes, `false` otherwise. +#[cfg(all(unix, not(any(target_os = "macos", target_os = "openbsd"))))] +pub fn compare_xattrs>(path1: P, path2: P) -> bool { + let get_sorted_xattrs = |path: P| { + xattr::list(path) + .map(|attrs| { + let mut attrs = attrs.collect::>(); + attrs.sort(); + attrs + }) + .unwrap_or_default() + }; + + get_sorted_xattrs(path1) == get_sorted_xattrs(path2) +} + +/// Object-oriented path struct that represents and operates on +/// paths relative to the directory it was constructed for. +#[derive(Clone)] +pub struct AtPath { + pub subdir: PathBuf, +} + +impl AtPath { + pub fn new(subdir: &Path) -> Self { + Self { + subdir: PathBuf::from(subdir), + } + } + + pub fn as_string(&self) -> String { + self.subdir.to_str().unwrap().to_owned() + } + + pub fn plus>(&self, name: P) -> PathBuf { + let mut pathbuf = self.subdir.clone(); + pathbuf.push(name); + pathbuf + } + + pub fn plus_as_string>(&self, name: P) -> String { + self.plus(name).display().to_string() + } + + fn minus(&self, name: &str) -> PathBuf { + let prefixed = PathBuf::from(name); + if prefixed.starts_with(&self.subdir) { + let mut unprefixed = PathBuf::new(); + for component in prefixed.components().skip(self.subdir.components().count()) { + unprefixed.push(component.as_os_str().to_str().unwrap()); + } + unprefixed + } else { + prefixed + } + } + + pub fn minus_as_string(&self, name: &str) -> String { + String::from(self.minus(name).to_str().unwrap()) + } + + pub fn set_readonly(&self, name: &str) { + let metadata = fs::metadata(self.plus(name)).unwrap(); + let mut permissions = metadata.permissions(); + permissions.set_readonly(true); + fs::set_permissions(self.plus(name), permissions).unwrap(); + } + + pub fn open(&self, name: &str) -> File { + log_info("open", self.plus_as_string(name)); + File::open(self.plus(name)).unwrap() + } + + pub fn read(&self, name: &str) -> String { + let mut f = self.open(name); + let mut contents = String::new(); + f.read_to_string(&mut contents) + .unwrap_or_else(|e| panic!("Couldn't read {name}: {e}")); + contents + } + + pub fn read_bytes(&self, name: &str) -> Vec { + let mut f = self.open(name); + let mut contents = Vec::new(); + f.read_to_end(&mut contents) + .unwrap_or_else(|e| panic!("Couldn't read {name}: {e}")); + contents + } + + pub fn write(&self, name: &str, contents: &str) { + log_info("write(default)", self.plus_as_string(name)); + fs::write(self.plus(name), contents) + .unwrap_or_else(|e| panic!("Couldn't write {name}: {e}")); + } + + pub fn write_bytes(&self, name: &str, contents: &[u8]) { + log_info("write(default)", self.plus_as_string(name)); + fs::write(self.plus(name), contents) + .unwrap_or_else(|e| panic!("Couldn't write {name}: {e}")); + } + + pub fn append(&self, name: impl AsRef, contents: &str) { + let name = name.as_ref(); + log_info("write(append)", self.plus_as_string(name)); + let mut f = OpenOptions::new() + .append(true) + .create(true) + .open(self.plus(name)) + .unwrap(); + f.write_all(contents.as_bytes()) + .unwrap_or_else(|e| panic!("Couldn't write(append) {}: {e}", name.display())); + } + + pub fn append_bytes(&self, name: &str, contents: &[u8]) { + log_info("write(append)", self.plus_as_string(name)); + let mut f = OpenOptions::new() + .append(true) + .create(true) + .open(self.plus(name)) + .unwrap(); + f.write_all(contents) + .unwrap_or_else(|e| panic!("Couldn't write(append) to {name}: {e}")); + } + + pub fn truncate(&self, name: &str, contents: &str) { + log_info("write(truncate)", self.plus_as_string(name)); + let mut f = OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(self.plus(name)) + .unwrap(); + f.write_all(contents.as_bytes()) + .unwrap_or_else(|e| panic!("Couldn't write(truncate) {name}: {e}")); + } + + pub fn rename(&self, source: &str, target: &str) { + let source = self.plus(source); + let target = self.plus(target); + log_info( + "rename", + format!("{} {}", source.display(), target.display()), + ); + fs::rename(&source, &target).unwrap_or_else(|e| { + panic!( + "Couldn't rename {} -> {}: {e}", + source.display(), + target.display() + ) + }); + } + + pub fn remove(&self, source: &str) { + let source = self.plus(source); + log_info("remove", format!("{}", source.display())); + remove_file(&source) + .unwrap_or_else(|e| panic!("Couldn't remove {}: {e}", source.display())); + } + + pub fn copy(&self, source: &str, target: &str) { + let source = self.plus(source); + let target = self.plus(target); + log_info("copy", format!("{} {}", source.display(), target.display())); + fs::copy(&source, &target).unwrap_or_else(|e| { + panic!( + "Couldn't copy {} -> {}: {e}", + source.display(), + target.display() + ) + }); + } + + pub fn rmdir(&self, dir: &str) { + log_info("rmdir", self.plus_as_string(dir)); + fs::remove_dir(self.plus(dir)).unwrap(); + } + + pub fn mkdir>(&self, dir: P) { + let dir = dir.as_ref(); + log_info("mkdir", self.plus_as_string(dir)); + fs::create_dir(self.plus(dir)).unwrap(); + } + + pub fn mkdir_all(&self, dir: &str) { + log_info("mkdir_all", self.plus_as_string(dir)); + fs::create_dir_all(self.plus(dir)).unwrap(); + } + + pub fn make_file(&self, name: &str) -> File { + match File::create(self.plus(name)) { + Ok(f) => f, + Err(e) => panic!("{e}"), + } + } + + pub fn touch>(&self, file: P) { + let file = file.as_ref(); + log_info("touch", self.plus_as_string(file)); + File::create(self.plus(file)).unwrap(); + } + + #[cfg(not(windows))] + pub fn mkfifo(&self, fifo: &str) { + let full_path = self.plus_as_string(fifo); + log_info("mkfifo", &full_path); + unsafe { + let fifo_name: CString = CString::new(full_path).expect("CString creation failed."); + libc::mkfifo(fifo_name.as_ptr(), libc::S_IWUSR | libc::S_IRUSR); + } + } + + #[cfg(unix)] + pub fn mksocket(&self, socket: &str) { + let full_path = self.plus_as_string(socket); + log_info("mksocket", &full_path); + UnixListener::bind(full_path).expect("Socket file creation failed."); + } + + #[cfg(not(windows))] + pub fn is_fifo(&self, fifo: &str) -> bool { + stat::stat(&self.plus(fifo)) + .is_ok_and(|s| SFlag::from_bits_truncate(s.st_mode).contains(SFlag::S_IFIFO)) + } + + #[cfg(not(windows))] + pub fn is_char_device(&self, char_dev: &str) -> bool { + stat::stat(&self.plus(char_dev)) + .is_ok_and(|s| SFlag::from_bits_truncate(s.st_mode).contains(SFlag::S_IFCHR)) + } + + pub fn hard_link(&self, original: &str, link: &str) { + log_info( + "hard_link", + format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), + ); + hard_link(self.plus(original), self.plus(link)).unwrap(); + } + + pub fn symlink_file(&self, original: &str, link: &str) { + log_info( + "symlink", + format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), + ); + symlink_file(self.plus(original), self.plus(link)).unwrap(); + } + + pub fn relative_symlink_file(&self, original: &str, link: &str) { + #[cfg(windows)] + let original = original.replace('/', MAIN_SEPARATOR_STR); + log_info( + "symlink", + format!("{original},{}", self.plus_as_string(link)), + ); + symlink_file(original, self.plus(link)).unwrap(); + } + + pub fn symlink_dir(&self, original: &str, link: &str) { + log_info( + "symlink", + format!( + "{},{}", + self.plus_as_string(original), + self.plus_as_string(link) + ), + ); + symlink_dir(self.plus(original), self.plus(link)).unwrap(); + } + + pub fn relative_symlink_dir(&self, original: &str, link: &str) { + #[cfg(windows)] + let original = original.replace('/', MAIN_SEPARATOR_STR); + log_info( + "symlink", + format!("{original},{}", self.plus_as_string(link)), + ); + symlink_dir(original, self.plus(link)).unwrap(); + } + + pub fn is_symlink(&self, path: &str) -> bool { + log_info("is_symlink", self.plus_as_string(path)); + match fs::symlink_metadata(self.plus(path)) { + Ok(m) => m.file_type().is_symlink(), + Err(_) => false, + } + } + + pub fn resolve_link(&self, path: &str) -> String { + log_info("resolve_link", self.plus_as_string(path)); + match fs::read_link(self.plus(path)) { + Ok(p) => self.minus_as_string(p.to_str().unwrap()), + Err(_) => String::new(), + } + } + + pub fn read_symlink(&self, path: &str) -> String { + log_info("read_symlink", self.plus_as_string(path)); + fs::read_link(self.plus(path)) + .unwrap() + .to_str() + .unwrap() + .to_owned() + } + + pub fn symlink_metadata(&self, path: &str) -> fs::Metadata { + match fs::symlink_metadata(self.plus(path)) { + Ok(m) => m, + Err(e) => panic!("{e}"), + } + } + + pub fn metadata(&self, path: &str) -> fs::Metadata { + match fs::metadata(self.plus(path)) { + Ok(m) => m, + Err(e) => panic!("{e}"), + } + } + + pub fn file_exists>(&self, path: P) -> bool { + match fs::metadata(self.plus(path)) { + Ok(m) => m.is_file(), + Err(_) => false, + } + } + + /// Decide whether the named symbolic link exists in the test directory. + pub fn symlink_exists>(&self, path: P) -> bool { + match fs::symlink_metadata(self.plus(path)) { + Ok(m) => m.file_type().is_symlink(), + Err(_) => false, + } + } + + pub fn dir_exists>(&self, path: P) -> bool { + match fs::metadata(self.plus(path)) { + Ok(m) => m.is_dir(), + Err(_) => false, + } + } + + pub fn root_dir_resolved(&self) -> String { + log_info("current_directory_resolved", ""); + let s = self + .subdir + .canonicalize() + .unwrap() + .to_str() + .unwrap() + .to_owned(); + + // Due to canonicalize()'s use of GetFinalPathNameByHandleW() on Windows, the resolved path + // starts with '\\?\' to extend the limit of a given path to 32,767 wide characters. + // + // To address this issue, we remove this prepended string if available. + // + // Source: + // http://stackoverflow.com/questions/31439011/getfinalpathnamebyhandle-without-prepended + let prefix = "\\\\?\\"; + + if let Some(stripped) = s.strip_prefix(prefix) { + String::from(stripped) + } else { + s + } + } + + /// Set the permissions of the specified file. + /// + /// # Panics + /// + /// This function panics if there is an error loading the metadata + /// or setting the permissions of the file. + #[cfg(not(windows))] + pub fn set_mode(&self, filename: &str, mode: u32) { + let path = self.plus(filename); + let mut perms = fs::metadata(&path).unwrap().permissions(); + perms.set_mode(mode); + fs::set_permissions(&path, perms).unwrap(); + } +} + +/// An environment for running a single uutils test case, serves three functions: +/// 1. centralizes logic for locating the uutils binary and calling the utility +/// 2. provides a unique temporary directory for the test case +/// 3. copies over fixtures for the utility to the temporary directory +/// +/// Fixtures can be found under `tests/fixtures/$util_name/` +pub struct TestScenario { + pub bin_path: PathBuf, + pub util_name: String, + pub fixtures: AtPath, + tmpd: Rc, + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + tmp_fs_mountpoint: Option, +} + +impl TestScenario { + pub fn new(util_name: T) -> Self + where + T: AsRef, + { + let tmpd = Rc::new(TempDir::new().unwrap()); + println!("bin: {:?}", get_tests_binary!()); + let ts = Self { + bin_path: PathBuf::from(get_tests_binary!()), + util_name: util_name.as_ref().into(), + fixtures: AtPath::new(tmpd.as_ref().path()), + tmpd, + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + tmp_fs_mountpoint: None, + }; + let mut fixture_path_builder = env::current_dir().unwrap(); + fixture_path_builder.push(TESTS_DIR); + fixture_path_builder.push(FIXTURES_DIR); + fixture_path_builder.push(util_name.as_ref()); + if let Ok(m) = fs::metadata(&fixture_path_builder) { + if m.is_dir() { + recursive_copy(&fixture_path_builder, &ts.fixtures.subdir).unwrap(); + } + } + ts + } + + /// Returns builder for invoking the target uutils binary. Paths given are + /// treated relative to the environment's unique temporary test directory. + pub fn ucmd(&self) -> UCommand { + UCommand::from_test_scenario(self) + } + + /// Returns builder for invoking any system command. Paths given are treated + /// relative to the environment's unique temporary test directory. + pub fn cmd>(&self, bin_path: S) -> UCommand { + let mut command = UCommand::new(); + command.bin_path(bin_path); + command.temp_dir(self.tmpd.clone()); + command + } + + /// Returns builder for invoking a command in shell (e.g. sh -c 'cmd'). + /// Paths given are treated relative to the environment's unique temporary + /// test directory. + pub fn cmd_shell>(&self, cmd: S) -> UCommand { + let mut command = UCommand::new(); + // Intentionally leave bin_path unset. + command.arg(cmd); + command.temp_dir(self.tmpd.clone()); + command + } + + /// Returns builder for invoking any uutils command. Paths given are treated + /// relative to the environment's unique temporary test directory. + pub fn ccmd>(&self, util_name: S) -> UCommand { + UCommand::with_util(util_name, self.tmpd.clone()) + } + + /// Mounts a temporary filesystem at the specified mount point. + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + pub fn mount_temp_fs(&mut self, mount_point: &str) -> core::result::Result<(), String> { + if self.tmp_fs_mountpoint.is_some() { + return Err("already mounted".to_string()); + } + let cmd_result = self + .cmd("mount") + .arg("-t") + .arg("tmpfs") + .arg("-o") + .arg("size=640k") // ought to be enough + .arg("tmpfs") + .arg(mount_point) + .run(); + if !cmd_result.succeeded() { + return Err(format!("mount failed: {}", cmd_result.stderr_str())); + } + self.tmp_fs_mountpoint = Some(mount_point.to_string()); + Ok(()) + } + + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + /// Unmounts the temporary filesystem if it is currently mounted. + pub fn umount_temp_fs(&mut self) { + if let Some(mount_point) = self.tmp_fs_mountpoint.as_ref() { + self.cmd("umount").arg(mount_point).succeeds(); + self.tmp_fs_mountpoint = None; + } + } +} + +impl Drop for TestScenario { + fn drop(&mut self) { + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + self.umount_temp_fs(); + } +} + +#[cfg(unix)] +#[derive(Debug, Default)] +pub struct TerminalSimulation { + pub size: Option, + pub stdin: bool, + pub stdout: bool, + pub stderr: bool, +} + +/// A `UCommand` is a builder wrapping an individual Command that provides several additional features: +/// 1. it has convenience functions that are more ergonomic to use for piping in stdin, spawning the command +/// and asserting on the results. +/// 2. it tracks arguments provided so that in test cases which may provide variations of an arg in loops +/// the test failure can display the exact call which preceded an assertion failure. +/// 3. it provides convenience construction methods to set the Command uutils utility and temporary directory. +/// +/// Per default `UCommand` runs a command given as an argument in a shell, platform independently. +/// It does so with safety in mind, so the working directory is set to an individual temporary +/// directory and the environment variables are cleared per default. +/// +/// The default behavior can be changed with builder methods: +/// * [`UCommand::with_util`]: Run `coreutils UTIL_NAME` instead of the shell +/// * [`UCommand::from_test_scenario`]: Run `coreutils UTIL_NAME` instead of the shell in the +/// temporary directory of the [`TestScenario`] +/// * [`UCommand::current_dir`]: Sets the working directory +/// * ... +#[derive(Debug, Default)] +pub struct UCommand { + args: VecDeque, + env_vars: Vec<(OsString, OsString)>, + current_dir: Option, + bin_path: Option, + util_name: Option, + has_run: bool, + ignore_stdin_write_error: bool, + stdin: Option, + stdout: Option, + stderr: Option, + bytes_into_stdin: Option>, + #[cfg(unix)] + limits: Vec<(rlimit::Resource, u64, u64)>, + stderr_to_stdout: bool, + timeout: Option, + #[cfg(unix)] + terminal_simulation: Option, + tmpd: Option>, // drop last + #[cfg(unix)] + umask: Option, +} + +impl UCommand { + /// Create a new plain [`UCommand`]. + /// + /// Executes a command that must be given as argument (for example with [`UCommand::arg`] in a + /// shell (`sh -c` on unix platforms or `cmd /C` on windows). + /// + /// Per default the environment is cleared and the working directory is set to an individual + /// temporary directory for safety purposes. + pub fn new() -> Self { + Self { + ..Default::default() + } + } + + /// Create a [`UCommand`] for a specific uutils utility. + /// + /// Sets the temporary directory to `tmpd` and the execution binary to the path where + /// `coreutils` is found. + pub fn with_util(util_name: T, tmpd: Rc) -> Self + where + T: AsRef, + { + let mut ucmd = Self::new(); + ucmd.util_name = Some(util_name.as_ref().into()); + ucmd.bin_path(&*get_tests_binary!()).temp_dir(tmpd); + ucmd + } + + /// Create a [`UCommand`] from a [`TestScenario`]. + /// + /// The temporary directory and uutils utility are inherited from the [`TestScenario`] and the + /// execution binary is set to `coreutils`. + pub fn from_test_scenario(scene: &TestScenario) -> Self { + Self::with_util(&scene.util_name, scene.tmpd.clone()) + } + + /// Set the execution binary. + /// + /// Make sure the binary found at this path is executable. It's safest to provide the + /// canonicalized path instead of just the name of the executable, since path resolution is not + /// guaranteed to work on all platforms. + fn bin_path(&mut self, bin_path: T) -> &mut Self + where + T: Into, + { + self.bin_path = Some(bin_path.into()); + self + } + + /// Set the temporary directory. + /// + /// Per default an individual temporary directory is created for every [`UCommand`]. If not + /// specified otherwise with [`UCommand::current_dir`] the working directory is set to this + /// temporary directory. + fn temp_dir(&mut self, temp_dir: Rc) -> &mut Self { + self.tmpd = Some(temp_dir); + self + } + + /// Set the working directory for this [`UCommand`] + /// + /// Per default the working directory is set to the [`UCommand`] temporary directory. + /// + pub fn current_dir(&mut self, current_dir: T) -> &mut Self + where + T: Into, + { + self.current_dir = Some(current_dir.into()); + self + } + + pub fn set_stdin>(&mut self, stdin: T) -> &mut Self { + self.stdin = Some(stdin.into()); + self + } + + pub fn set_stdout>(&mut self, stdout: T) -> &mut Self { + self.stdout = Some(stdout.into()); + self + } + + pub fn set_stderr>(&mut self, stderr: T) -> &mut Self { + self.stderr = Some(stderr.into()); + self + } + + pub fn stderr_to_stdout(&mut self) -> &mut Self { + self.stderr_to_stdout = true; + self + } + + /// Add a parameter to the invocation. Path arguments are treated relative + /// to the test environment directory. + pub fn arg>(&mut self, arg: S) -> &mut Self { + self.args.push_back(arg.as_ref().into()); + self + } + + /// Add multiple parameters to the invocation. Path arguments are treated relative + /// to the test environment directory. + pub fn args>(&mut self, args: &[S]) -> &mut Self { + self.args.extend(args.iter().map(|s| s.as_ref().into())); + self + } + + /// provides standard input to feed in to the command when spawned + pub fn pipe_in>>(&mut self, input: T) -> &mut Self { + assert!( + self.bytes_into_stdin.is_none(), + "{MULTIPLE_STDIN_MEANINGLESS}", + ); + self.set_stdin(Stdio::piped()); + self.bytes_into_stdin = Some(input.into()); + self + } + + /// like `pipe_in()`, but uses the contents of the file at the provided relative path as the piped in data + pub fn pipe_in_fixture>(&mut self, file_rel_path: S) -> &mut Self { + let contents = read_scenario_fixture(self.tmpd.as_ref(), file_rel_path); + self.pipe_in(contents) + } + + /// Ignores error caused by feeding stdin to the command. + /// This is typically useful to test non-standard workflows + /// like feeding something to a command that does not read it + pub fn ignore_stdin_write_error(&mut self) -> &mut Self { + self.ignore_stdin_write_error = true; + self + } + + pub fn env(&mut self, key: K, val: V) -> &mut Self + where + K: AsRef, + V: AsRef, + { + self.env_vars + .push((key.as_ref().into(), val.as_ref().into())); + self + } + + pub fn envs(&mut self, iter: I) -> &mut Self + where + I: IntoIterator, + K: AsRef, + V: AsRef, + { + for (k, v) in iter { + self.env(k, v); + } + self + } + + #[cfg(unix)] + pub fn limit( + &mut self, + resource: rlimit::Resource, + soft_limit: u64, + hard_limit: u64, + ) -> &mut Self { + self.limits.push((resource, soft_limit, hard_limit)); + self + } + + #[cfg(unix)] + /// The umask is a value that restricts the permissions of newly created files and directories. + pub fn umask(&mut self, umask: mode_t) -> &mut Self { + self.umask = Some(umask); + self + } + + /// Set the timeout for [`UCommand::run`] and similar methods in [`UCommand`]. + /// + /// After the timeout elapsed these `run` methods (besides [`UCommand::run_no_wait`]) will + /// panic. When [`UCommand::run_no_wait`] is used, this timeout is applied to + /// `wait_with_output` including all other waiting methods in [`UChild`] implicitly + /// using `wait_with_output()` and additionally [`UChild::kill`]. The default timeout of `kill` + /// will be overwritten by this `timeout`. + pub fn timeout(&mut self, timeout: Duration) -> &mut Self { + self.timeout = Some(timeout); + self + } + + /// Set if process should be run in a simulated terminal + /// + /// This is useful to test behavior that is only active if e.g. `stdout.is_terminal()` is `true`. + /// This function uses default terminal size and attaches stdin, stdout and stderr to that terminal. + /// For more control over the terminal simulation, use `terminal_sim_stdio` + /// (unix: pty, windows: `ConPTY`[not yet supported]) + #[cfg(unix)] + pub fn terminal_simulation(&mut self, enable: bool) -> &mut Self { + if enable { + self.terminal_simulation = Some(TerminalSimulation { + stdin: true, + stdout: true, + stderr: true, + ..Default::default() + }); + } else { + self.terminal_simulation = None; + } + self + } + + /// Allows to simulate a terminal use-case with specific properties. + /// + /// This is useful to test behavior that is only active if e.g. `stdout.is_terminal()` is `true`. + /// This function allows to set a specific size and to attach the terminal to only parts of the in/out. + #[cfg(unix)] + pub fn terminal_sim_stdio(&mut self, config: TerminalSimulation) -> &mut Self { + self.terminal_simulation = Some(config); + self + } + + #[cfg(unix)] + fn read_from_pty(pty_fd: OwnedFd, out: File) { + let read_file = File::from(pty_fd); + let mut reader = io::BufReader::new(read_file); + let mut writer = BufWriter::new(out); + let result = io::copy(&mut reader, &mut writer); + match result { + Ok(_) => {} + // Input/output error (os error 5) is returned due to pipe closes. Buffer gets content anyway. + Err(e) if e.raw_os_error().unwrap_or_default() == 5 => {} + Err(e) => { + eprintln!("Unexpected error: {e:?}"); + panic!("error forwarding output of pty"); + } + } + } + + #[cfg(unix)] + fn spawn_reader_thread( + captured_output: Option, + pty_fd_master: OwnedFd, + name: String, + ) -> Option { + if let Some(mut captured_output_i) = captured_output { + let fd = captured_output_i.try_clone().unwrap(); + + let handle = thread::Builder::new() + .name(name) + .spawn(move || { + Self::read_from_pty(pty_fd_master, fd); + }) + .unwrap(); + + captured_output_i.reader_thread_handle = Some(handle); + Some(captured_output_i) + } else { + None + } + } + + /// Build the `std::process::Command` and apply the defaults on fields which were not specified + /// by the user. + /// + /// These __defaults__ are: + /// * `bin_path`: Depending on the platform and os, the native shell (unix -> `/bin/sh` etc.). + /// This default also requires to set the first argument to `-c` on unix (`/C` on windows) if + /// this argument wasn't specified explicitly by the user. + /// * `util_name`: `None`. If neither `bin_path` nor `util_name` were given the arguments are + /// run in a shell (See `bin_path` above). + /// * `temp_dir`: If `current_dir` was not set, a new temporary directory will be created in + /// which this command will be run and `current_dir` will be set to this `temp_dir`. + /// * `current_dir`: The temporary directory given by `temp_dir`. + /// * `timeout`: `30 seconds` + /// * `stdin`: `Stdio::null()` + /// * `ignore_stdin_write_error`: `false` + /// * `stdout`, `stderr`: If not specified the output will be captured with [`CapturedOutput`] + /// * `stderr_to_stdout`: `false` + /// * `bytes_into_stdin`: `None` + /// * `limits`: `None`. + fn build( + &mut self, + ) -> ( + Command, + Option, + Option, + Option, + ) { + if self.bin_path.is_some() { + if let Some(util_name) = &self.util_name { + self.args.push_front(util_name.into()); + } + } else if let Some(util_name) = &self.util_name { + self.bin_path = Some(PathBuf::from(&*get_tests_binary!())); + self.args.push_front(util_name.into()); + // neither `bin_path` nor `util_name` was set so we apply the default to run the arguments + // in a platform specific shell + } else if cfg!(unix) { + #[cfg(target_os = "android")] + let bin_path = PathBuf::from("/system/bin/sh"); + #[cfg(not(target_os = "android"))] + let bin_path = PathBuf::from("/bin/sh"); + + self.bin_path = Some(bin_path); + let c_arg = OsString::from("-c"); + if !self.args.contains(&c_arg) { + self.args.push_front(c_arg); + } + } else { + self.bin_path = Some(PathBuf::from("cmd")); + let c_arg = OsString::from("/C"); + let k_arg = OsString::from("/K"); + if !self + .args + .iter() + .any(|s| s.eq_ignore_ascii_case(&c_arg) || s.eq_ignore_ascii_case(&k_arg)) + { + self.args.push_front(c_arg); + } + } + + // unwrap is safe here because we have set `self.bin_path` before + let mut command = Command::new(self.bin_path.as_ref().unwrap()); + command.args(&self.args); + + // We use a temporary directory as working directory if not specified otherwise with + // `current_dir()`. If neither `current_dir` nor a temporary directory is available, then we + // create our own. + if let Some(current_dir) = &self.current_dir { + command.current_dir(current_dir); + } else if let Some(temp_dir) = &self.tmpd { + command.current_dir(temp_dir.path()); + } else { + let temp_dir = tempfile::tempdir().unwrap(); + self.current_dir = Some(temp_dir.path().into()); + command.current_dir(temp_dir.path()); + self.tmpd = Some(Rc::new(temp_dir)); + } + + command.env_clear(); + + // Preserve PATH + if let Some(path) = env::var_os("PATH") { + command.env("PATH", path); + } + + if cfg!(windows) { + // spell-checker:ignore (dll) rsaenh + // %SYSTEMROOT% is required on Windows to initialize crypto provider + // ... and crypto provider is required for std::rand + // From `procmon`: RegQueryValue HKLM\SOFTWARE\Microsoft\Cryptography\Defaults\Provider\Microsoft Strong Cryptographic Provider\Image Path + // SUCCESS Type: REG_SZ, Length: 66, Data: %SystemRoot%\system32\rsaenh.dll" + if let Some(systemroot) = env::var_os("SYSTEMROOT") { + command.env("SYSTEMROOT", systemroot); + } + } else { + // if someone is setting LD_PRELOAD, there's probably a good reason for it + if let Some(ld_preload) = env::var_os("LD_PRELOAD") { + command.env("LD_PRELOAD", ld_preload); + } + } + + // Forward the LLVM_PROFILE_FILE variable to the call, for coverage purposes. + if let Some(ld_preload) = env::var_os("LLVM_PROFILE_FILE") { + command.env("LLVM_PROFILE_FILE", ld_preload); + } + + command + .envs(DEFAULT_ENV) + .envs(self.env_vars.iter().cloned()); + + if self.timeout.is_none() { + self.timeout = Some(Duration::from_secs(30)); + } + + let mut captured_stdout = None; + let mut captured_stderr = None; + #[cfg(unix)] + let mut stdin_pty: Option = None; + #[cfg(not(unix))] + let stdin_pty: Option = None; + if self.stderr_to_stdout { + let mut output = CapturedOutput::default(); + + command + .stdin(self.stdin.take().unwrap_or_else(Stdio::null)) + .stdout(Stdio::from(output.try_clone().unwrap())) + .stderr(Stdio::from(output.try_clone().unwrap())); + captured_stdout = Some(output); + } else { + let stdout = if self.stdout.is_some() { + self.stdout.take().unwrap() + } else { + let mut stdout = CapturedOutput::default(); + let stdio = Stdio::from(stdout.try_clone().unwrap()); + captured_stdout = Some(stdout); + stdio + }; + + let stderr = if self.stderr.is_some() { + self.stderr.take().unwrap() + } else { + let mut stderr = CapturedOutput::default(); + let stdio = Stdio::from(stderr.try_clone().unwrap()); + captured_stderr = Some(stderr); + stdio + }; + + command + .stdin(self.stdin.take().unwrap_or_else(Stdio::null)) + .stdout(stdout) + .stderr(stderr); + } + + #[cfg(unix)] + if let Some(simulated_terminal) = &self.terminal_simulation { + let terminal_size = simulated_terminal.size.unwrap_or(libc::winsize { + ws_col: 80, + ws_row: 30, + ws_xpixel: 80 * 8, + ws_ypixel: 30 * 10, + }); + + if simulated_terminal.stdin { + let OpenptyResult { + slave: pi_slave, + master: pi_master, + } = nix::pty::openpty(&terminal_size, None).unwrap(); + stdin_pty = Some(File::from(pi_master)); + command.stdin(pi_slave); + } + + if simulated_terminal.stdout { + let OpenptyResult { + slave: po_slave, + master: po_master, + } = nix::pty::openpty(&terminal_size, None).unwrap(); + captured_stdout = Self::spawn_reader_thread( + captured_stdout, + po_master, + "stdout_reader".to_string(), + ); + command.stdout(po_slave); + } + + if simulated_terminal.stderr { + let OpenptyResult { + slave: pe_slave, + master: pe_master, + } = nix::pty::openpty(&terminal_size, None).unwrap(); + captured_stderr = Self::spawn_reader_thread( + captured_stderr, + pe_master, + "stderr_reader".to_string(), + ); + command.stderr(pe_slave); + } + } + + #[cfg(unix)] + if !self.limits.is_empty() { + // just to be safe: move a copy of the limits list into the closure. + // this way the closure is fully self-contained. + let limits_copy = self.limits.clone(); + let closure = move || -> Result<()> { + for &(resource, soft_limit, hard_limit) in &limits_copy { + setrlimit(resource, soft_limit, hard_limit)?; + } + Ok(()) + }; + // SAFETY: the closure is self-contained and doesn't do any memory + // writes that would need to be propagated back to the parent process. + // also, the closure doesn't access stdin, stdout and stderr. + unsafe { + command.pre_exec(closure); + } + } + + #[cfg(unix)] + if let Some(umask) = self.umask { + unsafe { + command.pre_exec(move || { + libc::umask(umask); + Ok(()) + }); + } + } + + (command, captured_stdout, captured_stderr, stdin_pty) + } + + /// Spawns the command, feeds the stdin if any, and returns the + /// child process immediately. + pub fn run_no_wait(&mut self) -> UChild { + assert!(!self.has_run, "{ALREADY_RUN}"); + self.has_run = true; + + let (mut command, captured_stdout, captured_stderr, stdin_pty) = self.build(); + log_info("run", self.to_string()); + let child = command.spawn().unwrap(); + + let mut child = UChild::from(self, child, captured_stdout, captured_stderr, stdin_pty); + + if let Some(input) = self.bytes_into_stdin.take() { + child.pipe_in(input); + } + + child + } + + /// Spawns the command, feeds the stdin if any, waits for the result + /// and returns a command result. + /// It is recommended that you instead use `succeeds()` or `fails()` + pub fn run(&mut self) -> CmdResult { + self.run_no_wait().wait().unwrap() + } + + /// Spawns the command, feeding the passed in stdin, waits for the result + /// and returns a command result. + /// It is recommended that, instead of this, you use a combination of `pipe_in()` + /// with `succeeds()` or `fails()` + pub fn run_piped_stdin>>(&mut self, input: T) -> CmdResult { + self.pipe_in(input).run() + } + + /// Spawns the command, feeds the stdin if any, waits for the result, + /// asserts success, and returns a command result. + #[track_caller] + pub fn succeeds(&mut self) -> CmdResult { + let cmd_result = self.run(); + cmd_result.success(); + cmd_result + } + + /// Spawns the command, feeds the stdin if any, waits for the result, + /// asserts failure, and returns a command result. + #[track_caller] + pub fn fails(&mut self) -> CmdResult { + let cmd_result = self.run(); + cmd_result.failure(); + cmd_result + } + + #[track_caller] + pub fn fails_with_code(&mut self, expected_code: i32) -> CmdResult { + let cmd_result = self.run(); + cmd_result.failure(); + cmd_result.code_is(expected_code); + cmd_result + } + + pub fn get_full_fixture_path(&self, file_rel_path: &str) -> String { + let tmpdir_path = self.tmpd.as_ref().unwrap().path(); + format!("{}/{file_rel_path}", tmpdir_path.to_str().unwrap()) + } + + /// Runs the command, checks that the stdout starts with "expected", + /// then terminates the command. + #[track_caller] + pub fn run_stdout_starts_with(&mut self, expected: &[u8]) -> CmdResult { + let mut child = self.set_stdout(Stdio::piped()).run_no_wait(); + let buf = child.stdout_exact_bytes(expected.len()); + child.close_stdout(); + + assert_eq!(buf.as_slice(), expected); + child.wait().unwrap() + } +} + +impl std::fmt::Display for UCommand { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut comm_string: Vec = vec![ + self.bin_path + .as_ref() + .map_or(String::new(), |p| p.display().to_string()), + ]; + comm_string.extend(self.args.iter().map(|s| s.to_string_lossy().to_string())); + f.write_str(&comm_string.join(" ")) + } +} + +/// Stored the captured output in a temporary file. The file is deleted as soon as +/// [`CapturedOutput`] is dropped. +#[derive(Debug)] +struct CapturedOutput { + current_file: File, + output: tempfile::NamedTempFile, // drop last + reader_thread_handle: Option>, +} + +impl CapturedOutput { + /// Creates a new instance of `CapturedOutput` + fn new(output: tempfile::NamedTempFile) -> Self { + Self { + current_file: output.reopen().unwrap(), + output, + reader_thread_handle: None, + } + } + + /// Try to clone the file pointer. + fn try_clone(&mut self) -> Result { + self.output.as_file().try_clone() + } + + /// Return the captured output as [`String`]. + /// + /// Subsequent calls to any of the other output methods will operate on the subsequent output. + fn output(&mut self) -> String { + String::from_utf8(self.output_bytes()).unwrap() + } + + /// Return the exact amount of bytes as `String`. + /// + /// Subsequent calls to any of the other output methods will operate on the subsequent output. + /// + /// # Important + /// + /// This method blocks indefinitely if the amount of bytes given by `size` cannot be read + fn output_exact(&mut self, size: usize) -> String { + String::from_utf8(self.output_exact_bytes(size)).unwrap() + } + + /// Return the captured output as bytes. + /// + /// Subsequent calls to any of the other output methods will operate on the subsequent output. + fn output_bytes(&mut self) -> Vec { + let mut buffer = Vec::::new(); + self.current_file.read_to_end(&mut buffer).unwrap(); + buffer + } + + /// Return all captured output, so far. + /// + /// Subsequent calls to any of the other output methods will operate on the subsequent output. + fn output_all_bytes(&mut self) -> Vec { + let mut buffer = Vec::::new(); + let mut file = self.output.reopen().unwrap(); + + file.read_to_end(&mut buffer).unwrap(); + self.current_file = file; + + buffer + } + + /// Return the exact amount of bytes. + /// + /// Subsequent calls to any of the other output methods will operate on the subsequent output. + /// + /// # Important + /// + /// This method blocks indefinitely if the amount of bytes given by `size` cannot be read + fn output_exact_bytes(&mut self, size: usize) -> Vec { + let mut buffer = vec![0; size]; + self.current_file.read_exact(&mut buffer).unwrap(); + buffer + } +} + +impl Default for CapturedOutput { + fn default() -> Self { + let mut retries = 10; + let file = loop { + let file = Builder::new().rand_bytes(10).suffix(".out").tempfile(); + if file.is_ok() || retries <= 0 { + break file.unwrap(); + } + sleep(Duration::from_millis(100)); + retries -= 1; + }; + Self { + current_file: file.reopen().unwrap(), + output: file, + reader_thread_handle: None, + } + } +} + +impl Drop for CapturedOutput { + fn drop(&mut self) { + let _ = remove_file(self.output.path()); + } +} + +#[derive(Debug, Copy, Clone)] +pub enum AssertionMode { + All, + Current, + Exact(usize, usize), +} +pub struct UChildAssertion<'a> { + uchild: &'a mut UChild, +} + +impl<'a> UChildAssertion<'a> { + pub fn new(uchild: &'a mut UChild) -> Self { + Self { uchild } + } + + fn with_output(&mut self, mode: AssertionMode) -> CmdResult { + let exit_status = if self.uchild.is_alive() { + None + } else { + Some(self.uchild.raw.wait().unwrap()) + }; + let (stdout, stderr) = match mode { + AssertionMode::All => ( + self.uchild.stdout_all_bytes(), + self.uchild.stderr_all_bytes(), + ), + AssertionMode::Current => (self.uchild.stdout_bytes(), self.uchild.stderr_bytes()), + AssertionMode::Exact(expected_stdout_size, expected_stderr_size) => ( + self.uchild.stdout_exact_bytes(expected_stdout_size), + self.uchild.stderr_exact_bytes(expected_stderr_size), + ), + }; + CmdResult::new( + self.uchild.bin_path.clone(), + self.uchild.util_name.clone(), + self.uchild.tmpd.clone(), + exit_status, + stdout, + stderr, + ) + } + + // Make assertions of [`CmdResult`] with all output from start of the process until now. + // + // This method runs [`UChild::stdout_all_bytes`] and [`UChild::stderr_all_bytes`] under the + // hood. See there for side effects + pub fn with_all_output(&mut self) -> CmdResult { + self.with_output(AssertionMode::All) + } + + // Make assertions of [`CmdResult`] with the current output. + // + // This method runs [`UChild::stdout_bytes`] and [`UChild::stderr_bytes`] under the hood. See + // there for side effects + pub fn with_current_output(&mut self) -> CmdResult { + self.with_output(AssertionMode::Current) + } + + // Make assertions of [`CmdResult`] with the exact output. + // + // This method runs [`UChild::stdout_exact_bytes`] and [`UChild::stderr_exact_bytes`] under the + // hood. See there for side effects + pub fn with_exact_output( + &mut self, + expected_stdout_size: usize, + expected_stderr_size: usize, + ) -> CmdResult { + self.with_output(AssertionMode::Exact( + expected_stdout_size, + expected_stderr_size, + )) + } + + // Assert that the child process is alive + #[track_caller] + pub fn is_alive(&mut self) -> &mut Self { + match self.uchild.raw.try_wait() { + Ok(Some(status)) => panic!( + "Assertion failed. Expected '{}' to be running but exited with status={status}.\nstdout: {}\nstderr: {}", + uucore::util_name(), + self.uchild.stdout_all(), + self.uchild.stderr_all() + ), + Ok(None) => {} + Err(error) => panic!("Assertion failed with error '{error:?}'"), + } + + self + } + + // Assert that the child process has exited + #[track_caller] + pub fn is_not_alive(&mut self) -> &mut Self { + match self.uchild.raw.try_wait() { + Ok(None) => panic!( + "Assertion failed. Expected '{}' to be not running but was alive.\nstdout: {}\nstderr: {}", + uucore::util_name(), + self.uchild.stdout_all(), + self.uchild.stderr_all() + ), + Ok(_) => {} + Err(error) => panic!("Assertion failed with error '{error:?}'"), + } + + self + } +} + +/// Abstraction for a [`std::process::Child`] to handle the child process. +pub struct UChild { + raw: Child, + bin_path: PathBuf, + util_name: Option, + captured_stdout: Option, + captured_stderr: Option, + stdin_pty: Option, + ignore_stdin_write_error: bool, + stderr_to_stdout: bool, + join_handle: Option>>, + timeout: Option, + tmpd: Option>, // drop last +} + +impl UChild { + fn from( + ucommand: &UCommand, + child: Child, + captured_stdout: Option, + captured_stderr: Option, + stdin_pty: Option, + ) -> Self { + Self { + raw: child, + bin_path: ucommand.bin_path.clone().unwrap(), + util_name: ucommand.util_name.clone(), + captured_stdout, + captured_stderr, + stdin_pty, + ignore_stdin_write_error: ucommand.ignore_stdin_write_error, + stderr_to_stdout: ucommand.stderr_to_stdout, + join_handle: None, + timeout: ucommand.timeout, + tmpd: ucommand.tmpd.clone(), + } + } + + /// Convenience method for `sleep(Duration::from_millis(millis))` + pub fn delay(&mut self, millis: u64) -> &mut Self { + sleep(Duration::from_millis(millis)); + self + } + + /// Return the pid of the child process, similar to [`Child::id`]. + pub fn id(&self) -> u32 { + self.raw.id() + } + + /// Return true if the child process is still alive and false otherwise. + pub fn is_alive(&mut self) -> bool { + self.raw.try_wait().unwrap().is_none() + } + + /// Return true if the child process is exited and false otherwise. + #[allow(clippy::wrong_self_convention)] + pub fn is_not_alive(&mut self) -> bool { + !self.is_alive() + } + + /// Return a [`UChildAssertion`] + pub fn make_assertion(&mut self) -> UChildAssertion<'_> { + UChildAssertion::new(self) + } + + /// Convenience function for calling [`UChild::delay`] and then [`UChild::make_assertion`] + pub fn make_assertion_with_delay(&mut self, millis: u64) -> UChildAssertion<'_> { + self.delay(millis).make_assertion() + } + + /// Try to kill the child process and wait for its termination. + /// + /// This method blocks until the child process is killed, but returns an error if `self.timeout` + /// or the default of 60s was reached. If no such error happened, the process resources are + /// released, so there is usually no need to call `wait` or alike on unix systems although it's + /// still possible to do so. + /// + /// # Platform specific behavior + /// + /// On unix systems the child process resources will be released like a call to [`Child::wait`] + /// or alike would do. + /// + /// # Error + /// + /// If [`Child::kill`] returned an error or if the child process could not be terminated within + /// `self.timeout` or the default of 60s. + pub fn try_kill(&mut self) -> Result<()> { + let start = Instant::now(); + self.raw.kill()?; + + let timeout = self.timeout.unwrap_or(Duration::from_secs(60)); + // As a side effect, we're cleaning up the killed child process with the implicit call to + // `Child::try_wait` in `self.is_alive`, which reaps the process id on unix systems. We + // always fail with error on timeout if `self.timeout` is set to zero. + while self.is_alive() || timeout == Duration::ZERO { + if start.elapsed() < timeout { + self.delay(10); + } else { + return Err(io::Error::other(format!( + "kill: Timeout of '{}s' reached", + timeout.as_secs_f64() + ))); + } + hint::spin_loop(); + } + + Ok(()) + } + + /// Terminate the child process unconditionally and wait for the termination. + /// + /// Ignores any errors happening during [`Child::kill`] (i.e. child process already exited) but + /// still panics on timeout. + /// + /// # Panics + /// If the child process could not be terminated within `self.timeout` or the default of 60s. + pub fn kill(&mut self) -> &mut Self { + self.try_kill() + .or_else(|error| { + // We still throw the error on timeout in the `try_kill` function + if error.kind() == io::ErrorKind::Other { + Err(error) + } else { + Ok(()) + } + }) + .unwrap(); + self + } + + /// Try to kill the child process and wait for its termination. + /// + /// This method blocks until the child process is killed, but returns an error if `self.timeout` + /// or the default of 60s was reached. If no such error happened, the process resources are + /// released, so there is usually no need to call `wait` or alike on unix systems although it's + /// still possible to do so. + /// + /// # Platform specific behavior + /// + /// On unix systems the child process resources will be released like a call to [`Child::wait`] + /// or alike would do. + /// + /// # Error + /// + /// If [`Child::kill`] returned an error or if the child process could not be terminated within + /// `self.timeout` or the default of 60s. + #[cfg(unix)] + pub fn try_kill_with_custom_signal(&mut self, signal_name: sys::signal::Signal) -> Result<()> { + let start = Instant::now(); + sys::signal::kill( + nix::unistd::Pid::from_raw(self.raw.id().try_into().unwrap()), + signal_name, + ) + .unwrap(); + + let timeout = self.timeout.unwrap_or(Duration::from_secs(60)); + // As a side effect, we're cleaning up the killed child process with the implicit call to + // `Child::try_wait` in `self.is_alive`, which reaps the process id on unix systems. We + // always fail with error on timeout if `self.timeout` is set to zero. + while self.is_alive() || timeout == Duration::ZERO { + if start.elapsed() < timeout { + self.delay(10); + } else { + return Err(io::Error::other(format!( + "kill: Timeout of '{}s' reached", + timeout.as_secs_f64() + ))); + } + hint::spin_loop(); + } + + Ok(()) + } + + /// Terminate the child process using custom signal parameter and wait for the termination. + /// + /// Ignores any errors happening during [`Child::kill`] (i.e. child process already exited) but + /// still panics on timeout. + /// + /// # Panics + /// If the child process could not be terminated within `self.timeout` or the default of 60s. + #[cfg(unix)] + pub fn kill_with_custom_signal(&mut self, signal_name: sys::signal::Signal) -> &mut Self { + self.try_kill_with_custom_signal(signal_name) + .or_else(|error| { + // We still throw the error on timeout in the `try_kill` function + if error.kind() == io::ErrorKind::Other { + Err(error) + } else { + Ok(()) + } + }) + .unwrap(); + self + } + + /// Wait for the child process to terminate and return a [`CmdResult`]. + /// + /// See `wait_with_output` for details on timeouts etc. This method can also be run if + /// the child process was killed with [`UChild::kill`]. + /// + /// # Errors + /// + /// Returns the error from the call to `wait_with_output` if any + pub fn wait(self) -> Result { + let (bin_path, util_name, tmpd) = ( + self.bin_path.clone(), + self.util_name.clone(), + self.tmpd.clone(), + ); + + let output = self.wait_with_output()?; + + Ok(CmdResult { + bin_path, + util_name, + tmpd, + exit_status: Some(output.status), + stdout: output.stdout, + stderr: output.stderr, + }) + } + + /// Wait for the child process to terminate and return an instance of [`Output`]. + /// + /// If `self.timeout` is reached while waiting, a [`io::ErrorKind::Other`] representing a + /// timeout error is returned. If no errors happened, we join with the thread created by + /// [`UChild::pipe_in`] if any. + /// + /// # Error + /// + /// If `self.timeout` is reached while waiting or [`Child::wait_with_output`] returned an + /// error. + fn wait_with_output(mut self) -> Result { + // some apps do not stop execution until their stdin gets closed. + // to prevent a endless waiting here, we close the stdin. + self.join(); // ensure that all pending async input is piped in + self.close_stdin(); + + let output = if let Some(timeout) = self.timeout { + let child = self.raw; + + let (sender, receiver) = mpsc::channel(); + let handle = thread::Builder::new() + .name("wait_with_output".to_string()) + .spawn(move || sender.send(child.wait_with_output())) + .unwrap(); + + match receiver.recv_timeout(timeout) { + Ok(result) => { + // unwraps are safe here because we got a result from the sender and there was no panic + // causing a disconnect. + handle.join().unwrap().unwrap(); + result + } + Err(RecvTimeoutError::Timeout) => Err(io::Error::other(format!( + "wait: Timeout of '{}s' reached", + timeout.as_secs_f64() + ))), + Err(RecvTimeoutError::Disconnected) => { + handle.join().expect("Panic caused disconnect").unwrap(); + panic!("Error receiving from waiting thread because of unexpected disconnect"); + } + } + } else { + self.raw.wait_with_output() + }; + + let mut output = output?; + + if let Some(join_handle) = self.join_handle.take() { + join_handle + .join() + .expect("Error joining with the piping stdin thread") + .unwrap(); + } + + if let Some(stdout) = self.captured_stdout.as_mut() { + if let Some(handle) = stdout.reader_thread_handle.take() { + handle.join().unwrap(); + } + output.stdout = stdout.output_bytes(); + } + if let Some(stderr) = self.captured_stderr.as_mut() { + if let Some(handle) = stderr.reader_thread_handle.take() { + handle.join().unwrap(); + } + output.stderr = stderr.output_bytes(); + } + + Ok(output) + } + + /// Read, consume and return the output as [`String`] from [`Child`]'s stdout. + /// + /// See also [`UChild::stdout_bytes`] for side effects. + pub fn stdout(&mut self) -> String { + String::from_utf8(self.stdout_bytes()).unwrap() + } + + /// Read and return all child's output in stdout as String. + /// + /// Note, that a subsequent call of any of these functions + /// + /// * [`UChild::stdout`] + /// * [`UChild::stdout_bytes`] + /// * [`UChild::stdout_exact_bytes`] + /// + /// will operate on the subsequent output of the child process. + pub fn stdout_all(&mut self) -> String { + String::from_utf8(self.stdout_all_bytes()).unwrap() + } + + /// Read, consume and return the output as bytes from [`Child`]'s stdout. + /// + /// Each subsequent call to any of the functions below will operate on the subsequent output of + /// the child process: + /// + /// * [`UChild::stdout`] + /// * [`UChild::stdout_exact_bytes`] + /// * and the call to itself [`UChild::stdout_bytes`] + pub fn stdout_bytes(&mut self) -> Vec { + match self.captured_stdout.as_mut() { + Some(output) => output.output_bytes(), + None if self.raw.stdout.is_some() => { + let mut buffer: Vec = vec![]; + let stdout = self.raw.stdout.as_mut().unwrap(); + stdout.read_to_end(&mut buffer).unwrap(); + buffer + } + None => vec![], + } + } + + /// Read and return all output from start of the child process until now. + /// + /// Each subsequent call of any of the methods below will operate on the subsequent output of + /// the child process. This method will panic if the output wasn't captured (for example if + /// [`UCommand::set_stdout`] was used). + /// + /// * [`UChild::stdout`] + /// * [`UChild::stdout_bytes`] + /// * [`UChild::stdout_exact_bytes`] + pub fn stdout_all_bytes(&mut self) -> Vec { + match self.captured_stdout.as_mut() { + Some(output) => output.output_all_bytes(), + None => { + panic!("Usage error: This method cannot be used if the output wasn't captured.") + } + } + } + + /// Read, consume and return the exact amount of bytes from `stdout`. + /// + /// This method may block indefinitely if the `size` amount of bytes exceeds the amount of bytes + /// that can be read. See also [`UChild::stdout_bytes`] for side effects. + pub fn stdout_exact_bytes(&mut self, size: usize) -> Vec { + match self.captured_stdout.as_mut() { + Some(output) => output.output_exact_bytes(size), + None if self.raw.stdout.is_some() => { + let mut buffer = vec![0; size]; + let stdout = self.raw.stdout.as_mut().unwrap(); + stdout.read_exact(&mut buffer).unwrap(); + buffer + } + None => vec![], + } + } + + /// Read, consume and return the child's stderr as String. + /// + /// See also [`UChild::stdout_bytes`] for side effects. If stderr is redirected to stdout with + /// [`UCommand::stderr_to_stdout`] then always an empty string will be returned. + pub fn stderr(&mut self) -> String { + String::from_utf8(self.stderr_bytes()).unwrap() + } + + /// Read and return all child's output in stderr as String. + /// + /// Note, that a subsequent call of any of these functions + /// + /// * [`UChild::stderr`] + /// * [`UChild::stderr_bytes`] + /// * [`UChild::stderr_exact_bytes`] + /// + /// will operate on the subsequent output of the child process. If stderr is redirected to + /// stdout with [`UCommand::stderr_to_stdout`] then always an empty string will be returned. + pub fn stderr_all(&mut self) -> String { + String::from_utf8(self.stderr_all_bytes()).unwrap() + } + + /// Read, consume and return the currently available bytes from child's stderr. + /// + /// If stderr is redirected to stdout with [`UCommand::stderr_to_stdout`] then always zero bytes + /// are returned. See also [`UChild::stdout_bytes`] for side effects. + pub fn stderr_bytes(&mut self) -> Vec { + match self.captured_stderr.as_mut() { + Some(output) => output.output_bytes(), + None if self.raw.stderr.is_some() => { + let mut buffer: Vec = vec![]; + let stderr = self.raw.stderr.as_mut().unwrap(); + stderr.read_to_end(&mut buffer).unwrap(); + buffer + } + None => vec![], + } + } + + /// Read and return all output from start of the child process until now. + /// + /// Each subsequent call of any of the methods below will operate on the subsequent output of + /// the child process. This method will panic if the output wasn't captured (for example if + /// [`UCommand::set_stderr`] was used). If [`UCommand::stderr_to_stdout`] was used always zero + /// bytes are returned. + /// + /// * [`UChild::stderr`] + /// * [`UChild::stderr_bytes`] + /// * [`UChild::stderr_exact_bytes`] + pub fn stderr_all_bytes(&mut self) -> Vec { + match self.captured_stderr.as_mut() { + Some(output) => output.output_all_bytes(), + None if self.stderr_to_stdout => vec![], + None => { + panic!("Usage error: This method cannot be used if the output wasn't captured.") + } + } + } + + /// Read, consume and return the exact amount of bytes from stderr. + /// + /// If stderr is redirect to stdout with [`UCommand::stderr_to_stdout`] then always zero bytes + /// are returned. + /// + /// # Important + /// This method blocks indefinitely if the `size` amount of bytes cannot be read. + pub fn stderr_exact_bytes(&mut self, size: usize) -> Vec { + match self.captured_stderr.as_mut() { + Some(output) => output.output_exact_bytes(size), + None if self.raw.stderr.is_some() => { + let stderr = self.raw.stderr.as_mut().unwrap(); + let mut buffer = vec![0; size]; + stderr.read_exact(&mut buffer).unwrap(); + buffer + } + None => vec![], + } + } + + fn access_stdin_as_writer<'a>(&'a mut self) -> Box { + if let Some(stdin_fd) = &self.stdin_pty { + Box::new(BufWriter::new(stdin_fd.try_clone().unwrap())) + } else { + let stdin: &mut std::process::ChildStdin = self.raw.stdin.as_mut().unwrap(); + Box::new(BufWriter::new(stdin)) + } + } + + fn take_stdin_as_writer(&mut self) -> Box { + if let Some(stdin_fd) = mem::take(&mut self.stdin_pty) { + Box::new(BufWriter::new(stdin_fd)) + } else { + let stdin = self + .raw + .stdin + .take() + .expect("Could not pipe into child process. Was it set to Stdio::null()?"); + + Box::new(BufWriter::new(stdin)) + } + } + + /// Pipe data into [`Child`] stdin in a separate thread to avoid deadlocks. + /// + /// In contrast to [`UChild::write_in`], this method is designed to simulate a pipe on the + /// command line and can be used only once or else panics. Note, that [`UCommand::set_stdin`] + /// must be used together with [`Stdio::piped`] or else this method doesn't work as expected. + /// `Stdio::piped` is the current default when using [`UCommand::run_no_wait`]) without calling + /// `set_stdin`. This method stores a [`JoinHandle`] of the thread in which the writing to the + /// child processes' stdin is running. The associated thread is joined with the main process in + /// the methods below when exiting the child process. + /// + /// * [`UChild::wait`] + /// * [`UChild::pipe_in_and_wait`] + /// + /// Usually, there's no need to join manually but if needed, the [`UChild::join`] method can be + /// used . + /// + /// [`JoinHandle`]: std::thread::JoinHandle + pub fn pipe_in>>(&mut self, content: T) -> &mut Self { + let ignore_stdin_write_error = self.ignore_stdin_write_error; + let mut content: Vec = content.into(); + if self.stdin_pty.is_some() { + content.append(&mut END_OF_TRANSMISSION_SEQUENCE.to_vec()); + } + let mut writer = self.take_stdin_as_writer(); + + let join_handle = thread::Builder::new() + .name("pipe_in".to_string()) + .spawn( + move || match writer.write_all(&content).and_then(|()| writer.flush()) { + Err(error) if !ignore_stdin_write_error => Err(io::Error::other(format!( + "failed to write to stdin of child: {error}" + ))), + Ok(()) | Err(_) => Ok(()), + }, + ) + .unwrap(); + + self.join_handle = Some(join_handle); + self + } + + /// Call join on the thread created by [`UChild::pipe_in`] and if the thread is still running. + /// + /// This method can be called multiple times but is a noop if already joined. + pub fn join(&mut self) -> &mut Self { + if let Some(join_handle) = self.join_handle.take() { + join_handle + .join() + .expect("Error joining with the piping stdin thread") + .unwrap(); + } + self + } + + /// Convenience method for [`UChild::pipe_in`] and then [`UChild::wait`] + pub fn pipe_in_and_wait>>(mut self, content: T) -> CmdResult { + self.pipe_in(content); + self.wait().unwrap() + } + + /// Write some bytes to the child process stdin. + /// + /// This function is meant for small data and faking user input like typing a `yes` or `no`. + /// This function blocks until all data is written but can be used multiple times in contrast to + /// [`UChild::pipe_in`]. + /// + /// # Errors + /// If [`std::process::ChildStdin::write_all`] or [`std::process::ChildStdin::flush`] returned an error + pub fn try_write_in>>(&mut self, data: T) -> Result<()> { + let ignore_stdin_write_error = self.ignore_stdin_write_error; + let mut writer = self.access_stdin_as_writer(); + + match writer.write_all(&data.into()).and_then(|()| writer.flush()) { + Err(error) if !ignore_stdin_write_error => Err(io::Error::other(format!( + "failed to write to stdin of child: {error}" + ))), + Ok(()) | Err(_) => Ok(()), + } + } + + /// Convenience function for [`UChild::try_write_in`] and a following `unwrap`. + pub fn write_in>>(&mut self, data: T) -> &mut Self { + self.try_write_in(data).unwrap(); + self + } + + /// Close the child process stdout. + /// + /// Note this will have no effect if the output was captured with CapturedOutput which is the + /// default if [`UCommand::set_stdout`] wasn't called. + pub fn close_stdout(&mut self) -> &mut Self { + self.raw.stdout.take(); + self + } + + /// Close the child process stderr. + /// + /// Note this will have no effect if the output was captured with CapturedOutput which is the + /// default if [`UCommand::set_stderr`] wasn't called. + pub fn close_stderr(&mut self) -> &mut Self { + self.raw.stderr.take(); + self + } + + /// Close the child process stdin. + /// + /// Note, this does not have any effect if using the [`UChild::pipe_in`] method. + pub fn close_stdin(&mut self) -> &mut Self { + self.raw.stdin.take(); + if self.stdin_pty.is_some() { + // a pty can not be closed. We need to send a EOT: + let _ = self.try_write_in(END_OF_TRANSMISSION_SEQUENCE); + self.stdin_pty.take(); + } + self + } +} + +pub fn vec_of_size(n: usize) -> Vec { + let result = vec![b'a'; n]; + assert_eq!(result.len(), n); + result +} + +pub fn whoami() -> String { + // Apparently some CI environments have configuration issues, e.g. with 'whoami' and 'id'. + // + // From the Logs: "Build (ubuntu-18.04, x86_64-unknown-linux-gnu, feat_os_unix, use-cross)" + // whoami: cannot find name for user ID 1001 + // id --name: cannot find name for user ID 1001 + // id --name: cannot find name for group ID 116 + // + // However, when running "id" from within "/bin/bash" it looks fine: + // id: "uid=1001(runner) gid=118(docker) groups=118(docker),4(adm),101(systemd-journal)" + // whoami: "runner" + + // Use environment variable to get current user instead of + // invoking `whoami` and fall back to user "nobody" on error. + env::var("USER") + .or_else(|_| env::var("USERNAME")) + .unwrap_or_else(|e| { + println!("{UUTILS_WARNING}: {e}, using \"nobody\" instead"); + "nobody".to_string() + }) +} + +/// Create a PTY (pseudo-terminal) for testing utilities that require a TTY. +/// +/// Returns a tuple of (path, controller_fd, replica_fd) where: +/// - path: The filesystem path to the PTY replica device +/// - controller_fd: The controller file descriptor +/// - replica_fd: The replica file descriptor +#[cfg(unix)] +pub fn pty_path() -> (String, OwnedFd, OwnedFd) { + use nix::pty::openpty; + use nix::unistd::ttyname; + let pty = openpty(None, None).expect("Failed to create PTY"); + let path = ttyname(&pty.slave) + .expect("Failed to get PTY path") + .to_string_lossy() + .to_string(); + (path, pty.master, pty.slave) +} + +/// Add prefix 'g' for `util_name` if not on linux +#[cfg(unix)] +pub fn host_name_for(util_name: &str) -> Cow<'_, str> { + // In some environments, e.g. macOS/freebsd, the GNU coreutils are prefixed with "g" + // to not interfere with the BSD counterparts already in `$PATH`. + #[cfg(not(target_os = "linux"))] + { + // make call to `host_name_for` idempotent + if util_name.starts_with('g') && util_name != "groups" { + util_name.into() + } else { + format!("g{util_name}").into() + } + } + #[cfg(target_os = "linux")] + util_name.into() +} + +// Choose same coreutils version with ubuntu-latest runner: https://github.com/actions/runner-images/tree/main/images/ubuntu +const VERSION_MIN: &str = "9.4"; // minimum Version for the reference `coreutil` in `$PATH` + +const UUTILS_WARNING: &str = "uutils-tests-warning"; +const UUTILS_INFO: &str = "uutils-tests-info"; + +/// Run `util_name --version` and return Ok if the version is >= `version_expected`. +/// Returns an error if +/// * `util_name` cannot run +/// * the version cannot be parsed +/// * the version is too low +/// +/// This is used by `expected_result` to check if the coreutils version is >= `VERSION_MIN`. +/// It makes sense to use this manually in a test if a feature +/// is tested that was introduced after `VERSION_MIN` +/// +/// Example: +/// +/// ```no_run +/// use uutests::util::*; +/// const VERSION_MIN_MULTIPLE_USERS: &str = "8.31"; +/// +/// #[test] +/// fn test_xyz() { +/// unwrap_or_return!(check_coreutil_version( +/// util_name!(), +/// VERSION_MIN_MULTIPLE_USERS +/// )); +/// // proceed with the test... +/// } +/// ``` +#[cfg(unix)] +pub fn check_coreutil_version( + util_name: &str, + version_expected: &str, +) -> std::result::Result { + // example: + // $ id --version | head -n 1 + // id (GNU coreutils) 8.32.162-4eda + + let util_name = &host_name_for(util_name); + log_info("run", format!("{util_name} --version")); + let version_check = match Command::new(util_name.as_ref()) + .env("LC_ALL", "C") + .arg("--version") + .output() + { + Ok(s) => s, + Err(e) => return Err(format!("{UUTILS_WARNING}: '{util_name}' {e}")), + }; + std::str::from_utf8(&version_check.stdout).unwrap() + .split('\n') + .collect::>() + .first() + .map_or_else( + || Err(format!("{UUTILS_WARNING}: unexpected output format for reference coreutil: '{util_name} --version'")), + |s| { + if s.contains(&format!("(GNU coreutils) {version_expected}")) { + Ok(format!("{UUTILS_INFO}: {s}")) + } else if s.contains("(GNU coreutils)") { + let version_found = parse_coreutil_version(s); + let version_expected = version_expected.parse::().unwrap_or_default(); + if version_found > version_expected { + Ok(format!("{UUTILS_INFO}: version for the reference coreutil '{util_name}' is higher than expected; expected: {version_expected}, found: {version_found}")) + } else { + Err(format!("{UUTILS_WARNING}: version for the reference coreutil '{util_name}' does not match; expected: {version_expected}, found: {version_found}")) } + } else { + Err(format!("{UUTILS_WARNING}: no coreutils version string found for reference coreutils '{util_name} --version'")) + } + }, + ) +} + +// simple heuristic to parse the coreutils SemVer string, e.g. "id (GNU coreutils) 8.32.263-0475" +fn parse_coreutil_version(version_string: &str) -> f32 { + version_string + .split_whitespace() + .last() + .unwrap() + .split('.') + .take(2) + .collect::>() + .join(".") + .parse::() + .unwrap_or_default() +} + +/// This runs the GNU coreutils `util_name` binary in `$PATH` in order to +/// dynamically gather reference values on the system. +/// If the `util_name` in `$PATH` doesn't include a coreutils version string, +/// or the version is too low, this returns an error and the test should be skipped. +/// +/// Arguments: +/// - `ts`: The test context. +/// - `args`: Command-line variables applied to the command. +/// - `envs`: Environment variables applied to the command invocation. +/// +/// Example: +/// +/// ```no_run +/// use uutests::util::*; +/// #[test] +/// fn test_xyz() { +/// let ts = TestScenario::new(util_name!()); +/// let result = ts.ucmd().run(); +/// let exp_result = unwrap_or_return!(expected_result(&ts, &[])); +/// result +/// .stdout_is(exp_result.stdout_str()) +/// .stderr_is(exp_result.stderr_str()) +/// .code_is(exp_result.code()); +/// } +///``` +#[cfg(unix)] +pub fn gnu_cmd_result( + ts: &TestScenario, + args: &[&str], + envs: &[(&str, &str)], +) -> std::result::Result { + let util_name = ts.util_name.as_str(); + println!("{}", check_coreutil_version(util_name, VERSION_MIN)?); + let util_name = host_name_for(util_name); + + let result = ts + .cmd(util_name.as_ref()) + .env("PATH", PATH) + .envs(DEFAULT_ENV) + .envs(envs.iter().copied()) + .args(args) + .run(); + + let (stdout, stderr): (String, String) = if cfg!(target_os = "linux") { + ( + result.stdout_str().to_string(), + result.stderr_str_lossy().to_string(), + ) + } else { + // `host_name_for` added prefix, strip 'g' prefix from results: + let from = util_name.to_string() + ":"; + let to = &from[1..]; + ( + result.stdout_str().replace(&from, to), + result.stderr_str_lossy().replace(&from, to), + ) + }; + + Ok(CmdResult::new( + ts.bin_path.as_os_str().to_str().unwrap().to_string(), + Some(ts.util_name.clone()), + Some(result.tmpd()), + result.exit_status, + stdout.as_bytes(), + stderr.as_bytes(), + )) +} + +/// This runs the GNU coreutils `util_name` binary in `$PATH` in order to +/// dynamically gather reference values on the system. +/// If the `util_name` in `$PATH` doesn't include a coreutils version string, +/// or the version is too low, this returns an error and the test should be skipped. +/// +/// Example: +/// +/// ```no_run +/// use uutests::util::*; +/// #[test] +/// fn test_xyz() { +/// let ts = TestScenario::new(util_name!()); +/// let result = ts.ucmd().run(); +/// let exp_result = unwrap_or_return!(expected_result(&ts, &[])); +/// result +/// .stdout_is(exp_result.stdout_str()) +/// .stderr_is(exp_result.stderr_str()) +/// .code_is(exp_result.code()); +/// } +///``` +#[cfg(unix)] +pub fn expected_result(ts: &TestScenario, args: &[&str]) -> std::result::Result { + gnu_cmd_result(ts, args, &[]) +} + +/// This is a convenience wrapper to run a ucmd with root permissions. +/// It can be used to test programs when being root is needed +/// This runs `sudo -E --non-interactive target/debug/coreutils util_name args` +/// This is primarily designed to run in an environment where whoami is in $path +/// and where non-interactive sudo is possible. +/// To check if i) non-interactive sudo is possible and ii) if sudo works, this runs: +/// `sudo -E --non-interactive whoami` first. +/// +/// This return an `Err()` if run inside CICD because there's no 'sudo'. +/// +/// Example: +/// +/// ```no_run +/// use uutests::util::*; +/// #[test] +/// fn test_xyz() { +/// let ts = TestScenario::new("whoami"); +/// let expected = "root\n".to_string(); +/// if let Ok(result) = run_ucmd_as_root(&ts, &[]) { +/// result.stdout_is(expected); +/// } else { +/// println!("TEST SKIPPED"); +/// } +/// } +///``` +#[cfg(unix)] +pub fn run_ucmd_as_root( + ts: &TestScenario, + args: &[&str], +) -> std::result::Result { + run_ucmd_as_root_with_stdin_stdout(ts, args, None, None) +} + +#[cfg(unix)] +pub fn run_ucmd_as_root_with_stdin_stdout( + ts: &TestScenario, + args: &[&str], + stdin: Option<&str>, + stdout: Option<&str>, +) -> std::result::Result { + if is_ci() { + Err(format!("{UUTILS_INFO}: {}", "cannot run inside CI")) + } else { + // check if we can run 'sudo' + log_info("run", "sudo -E --non-interactive whoami"); + match Command::new("sudo") + .envs(DEFAULT_ENV) + .args(["-E", "--non-interactive", "whoami"]) + .output() + { + Ok(output) if String::from_utf8_lossy(&output.stdout).eq("root\n") => { + // we can run sudo and we're root + // run ucmd as root: + let mut cmd = ts.cmd("sudo"); + cmd.env("PATH", PATH) + .envs(DEFAULT_ENV) + .arg("-E") + .arg("--non-interactive") + .arg(&ts.bin_path) + .arg(&ts.util_name) + .args(args); + if let Some(stdin) = stdin { + cmd.set_stdin(File::open(stdin).unwrap()); + } + if let Some(stdout) = stdout { + cmd.set_stdout(File::open(stdout).unwrap()); + } + Ok(cmd.run()) + } + Ok(output) + if String::from_utf8_lossy(&output.stderr).eq("sudo: a password is required\n") => + { + Err("Cannot run non-interactive sudo".to_string()) + } + Ok(_output) => Err("\"sudo whoami\" didn't return \"root\"".to_string()), + Err(e) => Err(format!("{UUTILS_WARNING}: {e}")), + } + } +} + +/// Sanity checks for test utils +#[cfg(test)] +mod tests { + // spell-checker:ignore (tests) asdfsadfa + use super::*; + + // Create a init for the test with a fake value (not needed) + #[cfg(test)] + #[ctor::ctor] + fn init() { + unsafe { + env::set_var("UUTESTS_BINARY_PATH", ""); + } + } + + pub fn run_cmd>(cmd: T) -> CmdResult { + UCommand::new().arg(cmd).run() + } + + #[test] + fn test_command_result_when_no_output_with_exit_32() { + let result = run_cmd("exit 32"); + + if cfg!(windows) { + std::assert!(result.bin_path.ends_with("cmd")); + } else { + std::assert!(result.bin_path.ends_with("sh")); + } + + std::assert!(result.util_name.is_none()); + std::assert!(result.tmpd.is_some()); + + assert!(result.exit_status.is_some()); + std::assert_eq!(result.code(), 32); + result.code_is(32); + assert!(!result.succeeded()); + result.failure(); + result.fails_silently(); + assert!(result.stderr.is_empty()); + assert!(result.stdout.is_empty()); + result.no_output(); + result.no_stderr(); + result.no_stdout(); + } + + #[test] + #[should_panic] + fn test_command_result_when_exit_32_then_success_panic() { + run_cmd("exit 32").success(); + } + + #[test] + fn test_command_result_when_no_output_with_exit_0() { + let result = run_cmd("exit 0"); + + assert!(result.exit_status.is_some()); + std::assert_eq!(result.code(), 0); + result.code_is(0); + assert!(result.succeeded()); + result.success(); + assert!(result.stderr.is_empty()); + assert!(result.stdout.is_empty()); + result.no_output(); + result.no_stderr(); + result.no_stdout(); + } + + #[test] + #[should_panic] + fn test_command_result_when_exit_0_then_failure_panics() { + run_cmd("exit 0").failure(); + } + + #[test] + #[should_panic] + fn test_command_result_when_exit_0_then_silent_failure_panics() { + run_cmd("exit 0").fails_silently(); + } + + #[test] + fn test_command_result_when_stdout_with_exit_0() { + #[cfg(windows)] + let (result, vector, string) = ( + run_cmd("echo hello& exit 0"), + vec![b'h', b'e', b'l', b'l', b'o', b'\r', b'\n'], + "hello\r\n", + ); + #[cfg(not(windows))] + let (result, vector, string) = ( + run_cmd("echo hello; exit 0"), + vec![b'h', b'e', b'l', b'l', b'o', b'\n'], + "hello\n", + ); + + assert!(result.exit_status.is_some()); + std::assert_eq!(result.code(), 0); + result.code_is(0); + assert!(result.succeeded()); + result.success(); + assert!(result.stderr.is_empty()); + std::assert_eq!(result.stdout, vector); + result.no_stderr(); + result.stdout_is(string); + result.stdout_is_bytes(&vector); + result.stdout_only(string); + result.stdout_only_bytes(&vector); + } + + #[test] + fn test_command_result_when_stderr_with_exit_0() { + #[cfg(windows)] + let (result, vector, string) = ( + run_cmd("echo hello>&2& exit 0"), + vec![b'h', b'e', b'l', b'l', b'o', b'\r', b'\n'], + "hello\r\n", + ); + #[cfg(not(windows))] + let (result, vector, string) = ( + run_cmd("echo hello >&2; exit 0"), + vec![b'h', b'e', b'l', b'l', b'o', b'\n'], + "hello\n", + ); + + assert!(result.exit_status.is_some()); + std::assert_eq!(result.code(), 0); + result.code_is(0); + assert!(result.succeeded()); + result.success(); + assert!(result.stdout.is_empty()); + result.no_stdout(); + std::assert_eq!(result.stderr, vector); + result.stderr_is(string); + result.stderr_is_bytes(&vector); + result.stderr_only(string); + result.stderr_only_bytes(&vector); + } + + #[test] + fn test_std_does_not_contain() { + #[cfg(windows)] + let res = run_cmd( + "(echo This is a likely error message& echo This is a likely error message>&2) & exit 0", + ); + #[cfg(not(windows))] + let res = run_cmd( + "echo This is a likely error message; echo This is a likely error message >&2; exit 0", + ); + res.stdout_does_not_contain("unlikely"); + res.stderr_does_not_contain("unlikely"); + } + + #[test] + #[should_panic] + fn test_stdout_does_not_contain_fail() { + #[cfg(windows)] + let res = run_cmd("echo This is a likely error message& exit 0"); + #[cfg(not(windows))] + let res = run_cmd("echo This is a likely error message; exit 0"); + + res.stdout_does_not_contain("likely"); + } + + #[test] + #[should_panic] + fn test_stderr_does_not_contain_fail() { + #[cfg(windows)] + let res = run_cmd("echo This is a likely error message>&2 & exit 0"); + #[cfg(not(windows))] + let res = run_cmd("echo This is a likely error message >&2; exit 0"); + + res.stderr_does_not_contain("likely"); + } + + #[test] + fn test_stdout_matches() { + #[cfg(windows)] + let res = run_cmd( + "(echo This is a likely error message& echo This is a likely error message>&2 ) & exit 0", + ); + #[cfg(not(windows))] + let res = run_cmd( + "echo This is a likely error message; echo This is a likely error message >&2; exit 0", + ); + + let positive = regex::Regex::new(".*likely.*").unwrap(); + let negative = regex::Regex::new(".*unlikely.*").unwrap(); + res.stdout_matches(&positive); + res.stdout_does_not_match(&negative); + } + + #[test] + #[should_panic] + fn test_stdout_matches_fail() { + #[cfg(windows)] + let res = run_cmd( + "(echo This is a likely error message& echo This is a likely error message>&2) & exit 0", + ); + #[cfg(not(windows))] + let res = run_cmd( + "echo This is a likely error message; echo This is a likely error message >&2; exit 0", + ); + + let negative = regex::Regex::new(".*unlikely.*").unwrap(); + res.stdout_matches(&negative); + } + + #[test] + #[should_panic] + fn test_stdout_not_matches_fail() { + #[cfg(windows)] + let res = run_cmd( + "(echo This is a likely error message& echo This is a likely error message>&2) & exit 0", + ); + #[cfg(not(windows))] + let res = run_cmd( + "echo This is a likely error message; echo This is a likely error message >&2; exit 0", + ); + + let positive = regex::Regex::new(".*likely.*").unwrap(); + res.stdout_does_not_match(&positive); + } + + #[test] + #[cfg(unix)] + fn test_parse_coreutil_version() { + use std::assert_eq; + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 9.0.123-0123").to_string(), + "9" + ); + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 8.32.263-0475").to_string(), + "8.32" + ); + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 8.25.123-0123").to_string(), + "8.25" + ); + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 9.0").to_string(), + "9" + ); + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 8.32").to_string(), + "8.32" + ); + assert_eq!( + parse_coreutil_version("id (GNU coreutils) 8.25").to_string(), + "8.25" + ); + } + + #[test] + #[cfg(unix)] + fn test_check_coreutil_version() { + match check_coreutil_version("id", VERSION_MIN) { + Ok(s) => assert!(s.starts_with("uutils-tests-")), + Err(s) => assert!(s.starts_with("uutils-tests-warning")), + } + #[cfg(target_os = "linux")] + std::assert_eq!( + check_coreutil_version("no test name", VERSION_MIN), + Err("uutils-tests-warning: 'no test name' \ + No such file or directory (os error 2)" + .to_string()) + ); + } + + #[test] + #[cfg(unix)] + fn test_expected_result() { + let ts = TestScenario::new("id"); + // assert!(expected_result(&ts, &[]).is_ok()); + match expected_result(&ts, &[]) { + Ok(r) => assert!(r.succeeded()), + Err(s) => assert!(s.starts_with("uutils-tests-warning")), + } + let ts = TestScenario::new("no test name"); + assert!(expected_result(&ts, &[]).is_err()); + } + + #[test] + #[cfg(unix)] + fn test_host_name_for() { + #[cfg(target_os = "linux")] + { + std::assert_eq!(host_name_for("id"), "id"); + std::assert_eq!(host_name_for("groups"), "groups"); + std::assert_eq!(host_name_for("who"), "who"); + } + #[cfg(not(target_os = "linux"))] + { + // spell-checker:ignore (strings) ggroups gwho + std::assert_eq!(host_name_for("id"), "gid"); + std::assert_eq!(host_name_for("groups"), "ggroups"); + std::assert_eq!(host_name_for("who"), "gwho"); + std::assert_eq!(host_name_for("gid"), "gid"); + std::assert_eq!(host_name_for("ggroups"), "ggroups"); + std::assert_eq!(host_name_for("gwho"), "gwho"); + } + } + + #[test] + #[cfg(unix)] + fn test_run_ucmd_as_root() { + if is_ci() { + println!("TEST SKIPPED (cannot run inside CI)"); + } else { + // Skip test if we can't guarantee non-interactive `sudo`, or if we're not "root" + if let Ok(output) = Command::new("sudo") + .env("LC_ALL", "C") + .args(["-E", "--non-interactive", "whoami"]) + .output() + { + if output.status.success() && String::from_utf8_lossy(&output.stdout).eq("root\n") { + let ts = TestScenario::new("whoami"); + std::assert_eq!( + run_ucmd_as_root(&ts, &[]).unwrap().stdout_str().trim(), + "root" + ); + } else { + println!("TEST SKIPPED (we're not root)"); + } + } else { + println!("TEST SKIPPED (cannot run sudo)"); + } + } + } + + #[cfg(all(unix, not(any(target_os = "macos", target_os = "openbsd"))))] + #[test] + fn test_compare_xattrs() { + use tempfile::tempdir; + + let temp_dir = tempdir().unwrap(); + let file_path1 = temp_dir.path().join("test_file1.txt"); + let file_path2 = temp_dir.path().join("test_file2.txt"); + + File::create(&file_path1).unwrap(); + File::create(&file_path2).unwrap(); + + let test_attr = "user.test_attr"; + let test_value = b"test value"; + xattr::set(&file_path1, test_attr, test_value).unwrap(); + + assert!(!compare_xattrs(&file_path1, &file_path2)); + + xattr::set(&file_path2, test_attr, test_value).unwrap(); + assert!(compare_xattrs(&file_path1, &file_path2)); + } + + #[cfg(unix)] + #[test] + fn test_application_of_process_resource_limits_unlimited_file_size() { + let ts = TestScenario::new("util"); + ts.cmd("sh") + .args(&["-c", "ulimit -Sf; ulimit -Hf"]) + .succeeds() + .no_stderr() + .stdout_is("unlimited\nunlimited\n"); + } + + #[cfg(unix)] + #[test] + fn test_application_of_process_resource_limits_limited_file_size() { + let unit_size_bytes = if cfg!(target_os = "macos") { 1024 } else { 512 }; + + let ts = TestScenario::new("util"); + ts.cmd("sh") + .args(&["-c", "ulimit -Sf; ulimit -Hf"]) + .limit( + rlimit::Resource::FSIZE, + 8 * unit_size_bytes, + 16 * unit_size_bytes, + ) + .succeeds() + .no_stderr() + .stdout_is("8\n16\n"); + } + + #[cfg(unix)] + #[cfg(not(target_os = "openbsd"))] + #[test] + fn test_altering_umask() { + use uucore::mode::get_umask; + let p_umask = get_umask(); + // make sure we are not testing against the same umask + let c_umask = if p_umask == 0o002 { 0o007 } else { 0o002 }; + let expected = if cfg!(target_os = "android") { + if p_umask == 0o002 { "007\n" } else { "002\n" } + } else if p_umask == 0o002 { + "0007\n" + } else { + "0002\n" + }; + + let ts = TestScenario::new("util"); + ts.cmd_shell("umask") + .umask(c_umask) + .succeeds() + .stdout_is(expected); + std::assert_eq!(p_umask, get_umask()); // make sure parent umask didn't change + } + + #[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] + #[test] + fn test_mount_temp_fs() { + let mut scene = TestScenario::new("util"); + let at = &scene.fixtures; + // Test must be run as root (or with `sudo -E`) + if scene.cmd("whoami").run().stdout_str() != "root\n" { + return; + } + at.mkdir("mountpoint"); + let mountpoint = at.plus("mountpoint"); + scene.mount_temp_fs(mountpoint.to_str().unwrap()).unwrap(); + scene + .cmd("df") + .arg("-h") + .arg(mountpoint) + .succeeds() + .stdout_contains("tmpfs"); + } +}