commit 844733d5d577799fc6c6d377f192c4874a642af2 Author: Caileb Date: Wed Jul 9 18:55:26 2025 -0500 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..dfd150b --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,554 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "blake3" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "rayon-core", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cc" +version = "1.2.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "clap" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_derive" +version = "4.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "rayon", + "unicode-width", + "web-time", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "song-hash" +version = "0.1.0" +dependencies = [ + "anyhow", + "blake3", + "clap", + "indicatif", + "rayon", + "tempfile", + "walkdir", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..72f580c --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "song-hash" +version = "0.1.0" +edition = "2024" + +[dependencies] +clap = { version = "4.5", default-features = false, features = ["derive", "std"] } +walkdir = "2.3" +blake3 = { version = "1.5", features = ["rayon"] } +anyhow = "1.0" +rayon = "1.10" +indicatif = { version = "0.17", features = ["rayon"] } + +[dev-dependencies] +tempfile = "3.8" + +[profile.release] +opt-level = "z" # Optimize aggressively for size +lto = true # Link-time optimization +codegen-units = 1 # Maximum optimization opportunities +panic = "abort" # Smaller panic handling +strip = true # Strip symbols (Rust 1.59+) +overflow-checks = false # Disable overflow checks for size +debug = false # No debug info +debug-assertions = false # No debug assertions +incremental = false # Disable incremental compilation +rpath = false # Don't use rpath + +[profile.release.package."*"] +opt-level = "z" # Apply size optimization to all dependencies \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd0bf50 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# song-hash + +A blazing-fast Rust CLI for generating and verifying BLAKE3 checksums of large music libraries with a tiny binary footprint. + +## Features +- Generates hashes using the modern and extremely fast BLAKE3 algorithm +- Hashes and verifies many folders at once using all available CPU cores +- Creates a single `checksums.txt` file per album, leaving artist folders clean +- Never overwrites existing checksums unless you explicitly use `--force` +- Tiny release binary (~550KB) + +## Installation +```bash +cargo build --release +``` + +## Usage +```bash +# Generate checksums for a music library +song-hash hash [OPTIONS] + +# Verify checksums for a music library +song-hash verify [OPTIONS] +``` + +**Hashing Options:** +- `-o, --output `: Sets the name of the checksum file (default: `checksums.txt`). +- `-f, --force`: Overwrites existing checksum files if they exist. + +**Verification Options:** +- `-c, --checksum `: Specifies the name of the checksum file to find and verify (default: `checksums.txt`). + +## Examples +Hash your entire music collection (non-destructive): +```bash +song-hash hash "D:\Music" +``` +Overwrite existing checksum files: +```bash +song-hash hash -f "D:\Music" +``` +Verify every album under `Downloads`: +```bash +song-hash verify "C:\Users\You\Downloads" +``` + +## What gets hashed? + +Only audio files (`flac`, `mp3`, `wav`, `ogg`, `opus`, `m4a`, `aac`, `alac`, `ape`, `wma`, `aif`, `aiff`, `pcm`, `mka`) that live **directly inside** each album directory. Artist folders that contain only sub-folders are skipped. The checksum file format is just two columns: **hash** & **filename**. +``` +b818e0e88f3b… 01 - Intro.flac +7c6a21d82f4c… 02 - Track.flac +``` \ No newline at end of file diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000..f6f5ef9 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,489 @@ +use std::path::{Path, PathBuf}; +use std::fs::{self, File}; +use std::io::Write; +use anyhow::{anyhow, Result, Context}; +use rayon::prelude::*; +use indicatif::{ProgressBar, ProgressStyle}; +use walkdir::WalkDir; +use crate::utils::{is_audio_file, compute_hash, human_duration}; + +pub fn generate_checksums(root: &Path, output_name: &str, force: bool) -> Result<()> { + let start = std::time::Instant::now(); + if !root.is_dir() { + return Err(anyhow!("Provided path is not a directory: {}", root.display())); + } + + // Gather candidate directories + let dirs: Vec = WalkDir::new(root) + .min_depth(1) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_dir()) + .map(|e| e.into_path()) + .collect(); + + // Filter those with audio files + let audio_dirs: Vec = dirs + .into_iter() + .filter(|d| { + fs::read_dir(d) + .map(|rd| rd.flatten().any(|e| { + let p = e.path(); + p.is_file() && is_audio_file(&p) + })) + .unwrap_or(false) + }) + .collect(); + + let pb = ProgressBar::new(audio_dirs.len() as u64); + pb.set_style(ProgressStyle::with_template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} albums")?.progress_chars("=>-")); + + audio_dirs.par_iter().for_each(|dir| { + if let Err(e) = process_album(dir, output_name, force) { + eprintln!("{}", e); + } + pb.inc(1); + }); + + pb.finish_and_clear(); + let elapsed = start.elapsed(); + + if audio_dirs.is_empty() { + println!( + "➡️ No audio folders found under {} (completed in {})", + root.display(), + human_duration(elapsed) + ); + } else { + println!( + "✅ Created checksum files for {dirs} directories in {time} using {thr} threads (avg {avg:.2} ms/folder)", + dirs = audio_dirs.len(), + time = human_duration(elapsed), + thr = rayon::current_num_threads(), + avg = elapsed.as_secs_f64() * 1000.0 / audio_dirs.len() as f64 + ); + } + + Ok(()) +} + +fn process_album(dir: &Path, output_name: &str, force: bool) -> Result<()> { + let mut audio_files = Vec::new(); + if let Ok(rd) = fs::read_dir(dir) { + for dir_entry in rd.flatten() { + let p = dir_entry.path(); + if p.is_file() && is_audio_file(&p) { + if p.file_name() + .and_then(|s| s.to_str()) + .map(|s| s.eq_ignore_ascii_case(output_name)) + .unwrap_or(false) + { + continue; + } + audio_files.push(p); + } + } + } + + if audio_files.is_empty() { + return Ok(()); + } + + let output_path = dir.join(output_name); + if output_path.exists() && !force { + // Skip existing checksum + return Ok(()); + } + + let mut writer = File::create(&output_path) + .with_context(|| format!("Failed to create checksum file at {}", output_path.display()))?; + + let mut results: Vec<(String, String)> = audio_files + .par_iter() + .map(|p| { + let hash = compute_hash(p)?; + let rel = p.file_name().unwrap().to_string_lossy().to_string(); + Ok((hash, rel)) + }) + .collect::>>()?; + + results.sort_by(|a, b| a.1.cmp(&b.1)); + + for (hash, rel) in results { + writeln!(writer, "{} {}", hash, rel)?; + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + use std::fs; + + #[test] + fn only_checksum_files_created_during_hashing() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create album directory with audio files + let album_dir = root.join("Artist").join("Album"); + fs::create_dir_all(&album_dir)?; + + let audio_files = vec![ + ("01 - Track One.flac", "fake flac content 1".as_bytes()), + ("02 - Track Two.mp3", "fake mp3 content 22".as_bytes()), + ("03 - Track Three.wav", "fake wav content 33".as_bytes()), + ]; + + let mut original_audio_metadata = Vec::new(); + + // Create audio files and store their metadata + for (name, content) in &audio_files { + let file_path = album_dir.join(name); + fs::write(&file_path, *content)?; + + let metadata = fs::metadata(&file_path)?; + let original_content = fs::read(&file_path)?; + let original_hash = blake3::hash(&original_content); + + original_audio_metadata.push((file_path, metadata.len(), metadata.modified()?, original_hash)); + } + + // Run hash generation + generate_checksums(root, "checksums.txt", false)?; + + // Verify checksum file was created + let checksum_path = album_dir.join("checksums.txt"); + assert!(checksum_path.exists(), "Checksum file should be created"); + + // Verify checksum file contains expected entries + let checksum_content = fs::read_to_string(&checksum_path)?; + for (name, _) in &audio_files { + assert!(checksum_content.contains(name), "Checksum file should contain {}", name); + } + + // CRITICAL: Verify NO audio files were modified in any way + for (i, (file_path, original_size, original_time, original_content_hash)) in original_audio_metadata.iter().enumerate() { + let current_metadata = fs::metadata(file_path)?; + let current_content = fs::read(file_path)?; + let current_content_hash = blake3::hash(¤t_content); + + assert_eq!(current_metadata.len(), *original_size, + "Audio file {} size changed during hashing!", audio_files[i].0); + assert_eq!(current_content_hash, *original_content_hash, + "Audio file {} content changed during hashing!", audio_files[i].0); + assert_eq!(current_metadata.modified()?, *original_time, + "Audio file {} was modified during hashing!", audio_files[i].0); + } + + println!("✅ PROOF: Hash generation only created checksum file, never touched audio files"); + Ok(()) + } + + #[test] + fn existing_checksums_preserved_without_force() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio file and existing checksum file + fs::write(album_dir.join("song.flac"), b"audio content")?; + fs::write(album_dir.join("checksums.txt"), b"existing checksum")?; + + // Generate checksums with force=false (should skip) + generate_checksums(root, "checksums.txt", false)?; + + // Verify original checksum file wasn't overwritten + let content = fs::read_to_string(album_dir.join("checksums.txt"))?; + assert_eq!(content, "existing checksum"); + + Ok(()) + } + + #[test] + fn readonly_audio_files_processed_successfully() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files + let audio_file = album_dir.join("readonly.flac"); + fs::write(&audio_file, b"protected audio content")?; + + // Make audio file read-only + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&audio_file)?.permissions(); + perms.set_mode(0o444); // read-only + fs::set_permissions(&audio_file, perms)?; + } + #[cfg(windows)] + { + let mut perms = fs::metadata(&audio_file)?.permissions(); + perms.set_readonly(true); + fs::set_permissions(&audio_file, perms)?; + } + + // This should succeed because we only READ the audio file + generate_checksums(root, "checksums.txt", false)?; + + // Verify checksum was created successfully + assert!(album_dir.join("checksums.txt").exists()); + + // Verify audio file is still read-only and unchanged + let content = fs::read(&audio_file)?; + assert_eq!(content, b"protected audio content"); + + println!("✅ PROOF: Read-only audio files processed successfully - no write attempts"); + Ok(()) + } + + #[test] + fn empty_dirs_get_skipped() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create empty directories + fs::create_dir_all(root.join("Artist").join("Empty Album"))?; + fs::create_dir_all(root.join("Another Artist").join("Also Empty"))?; + + // Create one with only non-audio files + let docs_dir = root.join("Documents"); + fs::create_dir_all(&docs_dir)?; + fs::write(docs_dir.join("readme.txt"), b"not an album")?; + fs::write(docs_dir.join("cover.jpg"), b"fake image")?; + + // Run hash generation + generate_checksums(root, "checksums.txt", false)?; + + // No checksum files should be created + assert!(!root.join("Artist").join("Empty Album").join("checksums.txt").exists()); + assert!(!root.join("Another Artist").join("Also Empty").join("checksums.txt").exists()); + assert!(!docs_dir.join("checksums.txt").exists()); + + Ok(()) + } + + #[test] + fn deeply_nested_albums_found() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create deeply nested structure + let deep_album = root.join("Music").join("Rock").join("Classic").join("Pink Floyd").join("Dark Side of the Moon"); + fs::create_dir_all(&deep_album)?; + + // Add audio files to the deep directory + fs::write(deep_album.join("01 - Money.flac"), b"audio data 1")?; + fs::write(deep_album.join("02 - Time.flac"), b"audio data 2")?; + + // Run hash generation + generate_checksums(root, "checksums.txt", false)?; + + // Should find and process the deep album + let checksum_file = deep_album.join("checksums.txt"); + assert!(checksum_file.exists()); + + let content = fs::read_to_string(&checksum_file)?; + assert!(content.contains("01 - Money.flac")); + assert!(content.contains("02 - Time.flac")); + + Ok(()) + } + + #[test] + fn mixed_files_only_hashes_audio() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Mixed Album"); + fs::create_dir_all(&album_dir)?; + + // Mix of audio and non-audio files + fs::write(album_dir.join("track01.flac"), b"audio 1")?; + fs::write(album_dir.join("track02.mp3"), b"audio 2")?; + fs::write(album_dir.join("cover.jpg"), b"cover art")?; + fs::write(album_dir.join("info.txt"), b"album info")?; + fs::write(album_dir.join("folder.jpg"), b"folder image")?; + fs::write(album_dir.join("track03.wav"), b"audio 3")?; + + generate_checksums(root, "checksums.txt", false)?; + + let checksum_content = fs::read_to_string(album_dir.join("checksums.txt"))?; + + // Should contain audio files + assert!(checksum_content.contains("track01.flac")); + assert!(checksum_content.contains("track02.mp3")); + assert!(checksum_content.contains("track03.wav")); + + // Should NOT contain non-audio files + assert!(!checksum_content.contains("cover.jpg")); + assert!(!checksum_content.contains("info.txt")); + assert!(!checksum_content.contains("folder.jpg")); + + Ok(()) + } + + #[test] + fn crazy_unicode_filenames_work() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Unicode Test"); + fs::create_dir_all(&album_dir)?; + + // Real-world Unicode filenames + let weird_files = vec![ + "01 - Sigur Rós - Hoppípolla.flac", + "02 - Мария Каллас - Ария.mp3", + "03 - 久石譲 - 風の谷のナウシカ.wav", + "04 - 🎵 Song with emoji.ogg", + "05 - café résumé naïve.m4a", + ]; + + for filename in &weird_files { + fs::write(album_dir.join(filename), b"unicode audio content")?; + } + + generate_checksums(root, "checksums.txt", false)?; + + let checksum_content = fs::read_to_string(album_dir.join("checksums.txt"))?; + + // All Unicode filenames should be in the checksum + for filename in &weird_files { + assert!(checksum_content.contains(filename), "Missing: {}", filename); + } + + Ok(()) + } + + #[test] + fn checksum_filename_avoided_in_hashing() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files AND a file with the same name as our checksum file + fs::write(album_dir.join("song.flac"), b"real audio")?; + fs::write(album_dir.join("checksums.txt"), b"fake audio file with checksum name")?; + + // Use force=true to overwrite the existing checksums.txt + generate_checksums(root, "checksums.txt", true)?; + + let checksum_content = fs::read_to_string(album_dir.join("checksums.txt"))?; + + // Should contain the real audio file + assert!(checksum_content.contains("song.flac")); + + // Should NOT try to hash the checksums.txt file itself + // (this would be weird and recursive) + let lines: Vec<&str> = checksum_content.lines().collect(); + assert_eq!(lines.len(), 1); // Only one file should be hashed + + Ok(()) + } + + #[test] + fn non_directory_path_rejected() -> Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("not_a_directory.txt"); + fs::write(&file_path, b"this is a file")?; + + // Try to hash a file instead of a directory - should error + let result = generate_checksums(&file_path, "checksums.txt", false); + assert!(result.is_err()); + + let error_msg = format!("{}", result.unwrap_err()); + assert!(error_msg.contains("not a directory")); + + Ok(()) + } + + #[test] + fn progress_bar_error_handling() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create an album directory with an audio file that will cause process_album to be called + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + fs::write(album_dir.join("song.flac"), b"audio content")?; + + // This should trigger the progress bar and error handling paths + // The eprintln! path in the par_iter closure is hard to test directly, + // but we can ensure the function completes successfully + generate_checksums(root, "checksums.txt", false)?; + + // Verify it worked + assert!(album_dir.join("checksums.txt").exists()); + + Ok(()) + } + + #[test] + fn empty_album_directory_handling() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create an album directory with no audio files (only other files) + let album_dir = root.join("Empty Album"); + fs::create_dir_all(&album_dir)?; + fs::write(album_dir.join("readme.txt"), b"not audio")?; + fs::write(album_dir.join("cover.jpg"), b"image")?; + + // This should trigger the "audio_files.is_empty()" path in process_album + generate_checksums(root, "checksums.txt", false)?; + + // No checksum file should be created for empty audio directory + assert!(!album_dir.join("checksums.txt").exists()); + + Ok(()) + } + + #[test] + fn unreadable_directory_error_handling() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create a directory structure + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + fs::write(album_dir.join("song.flac"), b"audio content")?; + + // Try to make the directory unreadable to trigger fs::read_dir error + // This is platform specific and might not always work, but let's try + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&album_dir)?.permissions(); + perms.set_mode(0o000); // No permissions + let _ = fs::set_permissions(&album_dir, perms); // May fail on some systems + } + + // Generate checksums - should handle the read error gracefully + let result = generate_checksums(root, "checksums.txt", false); + + // Reset permissions for cleanup + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&album_dir)?.permissions(); + perms.set_mode(0o755); // Restore permissions for cleanup + let _ = fs::set_permissions(&album_dir, perms); + } + + // Should complete successfully even if some directories can't be read + assert!(result.is_ok()); + + Ok(()) + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..6e8320d --- /dev/null +++ b/src/main.rs @@ -0,0 +1,153 @@ +mod utils; +mod hash; +mod verify; + +use anyhow::Result; +use clap::{Parser, Subcommand}; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +#[command(author, version, about = "Song hashing utility", long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand, Debug)] +enum Commands { + /// Generate checksum files for all album folders under a root directory + Hash { + /// Root directory to scan + directory: PathBuf, + /// Custom checksum filename (default: checksums.txt) + #[arg(short, long)] + output: Option, + /// Overwrite existing checksum files + #[arg(short = 'f', long)] + force: bool, + }, + /// Verify using existing checksum files + Verify { + /// Root directory containing checksum files + directory: PathBuf, + /// Custom checksum filename (default: checksums.txt) + #[arg(short, long)] + checksum: Option, + }, +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + use std::fs; + + #[test] + fn cli_hash_command_execution() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create test album structure + let album_dir = root.join("Test Album"); + fs::create_dir_all(&album_dir)?; + fs::write(album_dir.join("song.flac"), b"test audio")?; + + // Mock CLI args for hash command + let cli = Cli { + command: Commands::Hash { + directory: root.to_path_buf(), + output: Some("test-checksums.txt".to_string()), + force: false, + }, + }; + + // Execute the command logic (same as main function) + match cli.command { + Commands::Hash { directory, output, force } => { + let output_name = output.unwrap_or_else(|| "checksums.txt".to_string()); + hash::generate_checksums(&directory, &output_name, force)?; + } + _ => unreachable!(), + } + + // Verify checksum file was created + assert!(album_dir.join("test-checksums.txt").exists()); + Ok(()) + } + + #[test] + fn cli_verify_command_execution() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create test album with checksum + let album_dir = root.join("Test Album"); + fs::create_dir_all(&album_dir)?; + let audio_file = album_dir.join("song.flac"); + fs::write(&audio_file, b"test audio")?; + + let hash = crate::utils::compute_hash(&audio_file)?; + fs::write(album_dir.join("test-verify.txt"), format!("{} song.flac\n", hash))?; + + // Mock CLI args for verify command + let cli = Cli { + command: Commands::Verify { + directory: root.to_path_buf(), + checksum: Some("test-verify.txt".to_string()), + }, + }; + + // Execute the command logic (same as main function) + match cli.command { + Commands::Verify { directory, checksum } => { + let checksum_name = checksum.unwrap_or_else(|| "checksums.txt".to_string()); + verify::verify_checksums(&directory, &checksum_name)?; + } + _ => unreachable!(), + } + + Ok(()) + } + + #[test] + fn main_function_execution_path() -> Result<()> { + // This test ensures the main function's Ok(()) return is covered + // We can't easily test the actual main() due to Cli::parse(), but we can test the logic + + let dir = tempdir()?; + let root = dir.path(); + + // Create minimal test structure + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + fs::write(album_dir.join("test.mp3"), b"audio")?; + + // Test both command paths to cover lines 39-44 and 46-48 + + // Hash command path + let output_name = "checksums.txt".to_string(); + hash::generate_checksums(root, &output_name, false)?; + + // Verify command path + let checksum_name = "checksums.txt".to_string(); + verify::verify_checksums(root, &checksum_name)?; + + // This covers the main function's execution flow and Ok(()) return (line 51) + Ok(()) + } +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + match cli.command { + Commands::Hash { directory, output, force } => { + let output_name = output.unwrap_or_else(|| "checksums.txt".to_string()); + hash::generate_checksums(&directory, &output_name, force)?; + } + Commands::Verify { directory, checksum } => { + let checksum_name = checksum.unwrap_or_else(|| "checksums.txt".to_string()); + verify::verify_checksums(&directory, &checksum_name)?; + } + } + Ok(()) +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..4ff923e --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,256 @@ +use std::path::Path; +use std::fs::File; +use anyhow::{Context, Result}; + +pub fn is_audio_file(path: &Path) -> bool { + match path.extension().and_then(|s| s.to_str()).map(|s| s.to_lowercase()) { + Some(ext) if [ + "flac", "mp3", "wav", "ogg", "opus", "m4a", "aac", "alac", "ape", "wma", "aif", "aiff", "pcm", "mka" + ].contains(&ext.as_str()) => true, + _ => false, + } +} + +pub fn compute_hash(path: &Path) -> Result { + const BUFFER_SIZE: usize = 131_072; // 128 KiB + let mut file = File::open(path) + .with_context(|| format!("Failed to open file {} for hashing", path.display()))?; + + let mut hasher = blake3::Hasher::new(); + let mut buffer = vec![0u8; BUFFER_SIZE]; + loop { + let n = std::io::Read::read(&mut file, &mut buffer)?; + if n == 0 { break; } + hasher.update(&buffer[..n]); + } + Ok(hasher.finalize().to_hex().to_string()) +} + +pub fn human_duration(d: std::time::Duration) -> String { + let secs = d.as_secs(); + let millis = d.subsec_millis(); + let hours = secs / 3600; + let minutes = (secs % 3600) / 60; + let seconds = secs % 60; + if hours > 0 { + format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis) + } else if minutes > 0 { + format!("{:02}:{:02}.{:03}", minutes, seconds, millis) + } else { + format!("{}.{:03}s", seconds, millis) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + use std::fs; + + #[test] + fn audio_extensions_recognized_correctly() { + // Test all supported audio formats + assert!(is_audio_file(Path::new("song.flac"))); + assert!(is_audio_file(Path::new("song.mp3"))); + assert!(is_audio_file(Path::new("song.wav"))); + assert!(is_audio_file(Path::new("song.ogg"))); + assert!(is_audio_file(Path::new("song.opus"))); + assert!(is_audio_file(Path::new("song.m4a"))); + assert!(is_audio_file(Path::new("song.aac"))); + assert!(is_audio_file(Path::new("song.alac"))); + assert!(is_audio_file(Path::new("song.ape"))); + assert!(is_audio_file(Path::new("song.wma"))); + assert!(is_audio_file(Path::new("song.aif"))); + assert!(is_audio_file(Path::new("song.aiff"))); + assert!(is_audio_file(Path::new("song.pcm"))); + assert!(is_audio_file(Path::new("song.mka"))); + + // Test case insensitive + assert!(is_audio_file(Path::new("SONG.FLAC"))); + assert!(is_audio_file(Path::new("Song.Mp3"))); + + // Test non-audio files + assert!(!is_audio_file(Path::new("song.txt"))); + assert!(!is_audio_file(Path::new("song.jpg"))); + assert!(!is_audio_file(Path::new("song.pdf"))); + assert!(!is_audio_file(Path::new("checksums.txt"))); + assert!(!is_audio_file(Path::new("no_extension"))); + } + + #[test] + fn hashing_same_file_gives_same_result() -> Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("test.flac"); + + // Create test file with known content + let test_content = b"fake audio content for testing"; + fs::write(&file_path, test_content)?; + + // Hash should be consistent across multiple calls + let hash1 = compute_hash(&file_path)?; + let hash2 = compute_hash(&file_path)?; + assert_eq!(hash1, hash2); + + // Hash should be deterministic for same content + let file_path2 = dir.path().join("test2.flac"); + fs::write(&file_path2, test_content)?; + let hash3 = compute_hash(&file_path2)?; + assert_eq!(hash1, hash3); + + Ok(()) + } + + #[test] + fn different_files_get_different_hashes() -> Result<()> { + let dir = tempdir()?; + + let file1 = dir.path().join("test1.flac"); + let file2 = dir.path().join("test2.flac"); + + fs::write(&file1, b"content one")?; + fs::write(&file2, b"content two")?; + + let hash1 = compute_hash(&file1)?; + let hash2 = compute_hash(&file2)?; + + assert_ne!(hash1, hash2); + Ok(()) + } + + #[test] + fn readonly_files_never_get_modified() -> Result<()> { + let dir = tempdir()?; + + // Create test audio files with known content + let audio_files = vec![ + ("song1.flac", "fake flac content".as_bytes()), + ("song2.mp3", "fake mp3 content1".as_bytes()), + ("track.wav", "fake wav content2".as_bytes()), + ]; + + let mut original_metadata = Vec::new(); + + for (name, content) in &audio_files { + let file_path = dir.path().join(name); + fs::write(&file_path, *content)?; + + // Store original metadata (size, modified time, content hash) + let metadata = fs::metadata(&file_path)?; + let original_content = fs::read(&file_path)?; + let original_hash = blake3::hash(&original_content); + + original_metadata.push((file_path.clone(), metadata.len(), metadata.modified()?, original_hash)); + + // Make file read-only to prove we never try to write to it + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = metadata.permissions(); + perms.set_mode(0o444); // read-only + fs::set_permissions(&file_path, perms)?; + } + #[cfg(windows)] + { + let mut perms = metadata.permissions(); + perms.set_readonly(true); + fs::set_permissions(&file_path, perms)?; + } + } + + // Now hash each file - this should only READ, never WRITE + for (file_path, _size, _time, _hash) in &original_metadata { + // This function should only read the file + let _computed_hash = compute_hash(file_path)?; + } + + // Verify ALL audio files are completely unchanged + for (i, (file_path, original_size, original_time, original_content_hash)) in original_metadata.iter().enumerate() { + let current_metadata = fs::metadata(file_path)?; + let current_content = fs::read(file_path)?; + let current_content_hash = blake3::hash(¤t_content); + + // File size must be identical + assert_eq!(current_metadata.len(), *original_size, + "Audio file {} size changed! This should NEVER happen", audio_files[i].0); + + // Content must be byte-for-byte identical + assert_eq!(current_content_hash, *original_content_hash, + "Audio file {} content changed! This tool corrupted your music!", audio_files[i].0); + + // Modified time should be unchanged (proving no writes occurred) + assert_eq!(current_metadata.modified()?, *original_time, + "Audio file {} was modified! This should NEVER happen", audio_files[i].0); + } + + println!("✅ PROOF: All audio files completely unchanged - 0.00% chance of corruption"); + Ok(()) + } + + #[test] + fn duration_formatting_looks_right() { + assert_eq!(human_duration(std::time::Duration::from_millis(500)), "0.500s"); + assert_eq!(human_duration(std::time::Duration::from_secs(65)), "01:05.000"); + assert_eq!(human_duration(std::time::Duration::from_secs(3661)), "01:01:01.000"); + } + + #[test] + fn unicode_filenames_detected_correctly() { + // Real-world scenario: Unicode characters in filenames + assert!(is_audio_file(Path::new("Björk - Vespertine.flac"))); + assert!(is_audio_file(Path::new("Пётр Ильич Чайковский.mp3"))); + assert!(is_audio_file(Path::new("中文歌曲.wav"))); + assert!(is_audio_file(Path::new("🎵 Music.ogg"))); + + // Still catches non-audio with Unicode + assert!(!is_audio_file(Path::new("Björk - readme.txt"))); + assert!(!is_audio_file(Path::new("🎵.doc"))); + } + + #[test] + fn weird_extensions_handled_properly() { + // Edge cases that might trip up the detection + assert!(!is_audio_file(Path::new("song.flac.bak"))); + assert!(!is_audio_file(Path::new("not_audio.mp3.txt"))); + assert!(!is_audio_file(Path::new(".flac"))); + assert!(!is_audio_file(Path::new("no_extension"))); + assert!(!is_audio_file(Path::new(""))); + } + + #[test] + fn huge_file_hashing_works() -> Result<()> { + let dir = tempdir()?; + let massive_file = dir.path().join("huge_album.flac"); + + // Create a decently large file (1MB of zeros, simulating a big audio file) + let chunk = vec![0u8; 1024 * 1024]; // 1MB + fs::write(&massive_file, &chunk)?; + + // Should hash without issues + let hash1 = compute_hash(&massive_file)?; + let hash2 = compute_hash(&massive_file)?; + + // Hashing should be deterministic even for large files + assert_eq!(hash1, hash2); + assert_eq!(hash1.len(), 64); // BLAKE3 produces 32-byte (64 hex char) hashes + + Ok(()) + } + + #[test] + fn empty_files_hash_fine() -> Result<()> { + let dir = tempdir()?; + let empty_file = dir.path().join("silence.wav"); + + // Empty file (corrupted/truncated audio) + fs::write(&empty_file, b"")?; + + let hash = compute_hash(&empty_file)?; + assert_eq!(hash.len(), 64); + + // Empty files should have consistent hash + let hash2 = compute_hash(&empty_file)?; + assert_eq!(hash, hash2); + + Ok(()) + } +} \ No newline at end of file diff --git a/src/verify.rs b/src/verify.rs new file mode 100644 index 0000000..b068b5e --- /dev/null +++ b/src/verify.rs @@ -0,0 +1,563 @@ +use std::path::{Path, PathBuf}; +use anyhow::{Result, Context}; +use rayon::prelude::*; +use walkdir::WalkDir; +use indicatif::{ProgressBar, ProgressStyle}; +use std::sync::Mutex; +use crate::utils::{compute_hash, human_duration}; +use std::io::BufRead; + +pub fn verify_checksums(root: &Path, checksum_name: &str) -> Result<()> { + let start = std::time::Instant::now(); + if !root.is_dir() { + anyhow::bail!("Provided path is not a directory: {}", root.display()); + } + + let checksum_paths: Vec = WalkDir::new(root) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + .filter(|e| { + e.path() + .file_name() + .and_then(|s| s.to_str()) + .map(|s| s.eq_ignore_ascii_case(checksum_name)) + .unwrap_or(false) + }) + .map(|e| e.into_path()) + .collect(); + + let pb = ProgressBar::new(checksum_paths.len() as u64); + pb.set_style(ProgressStyle::with_template("[{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} albums")?.progress_chars("=>-")); + + let mismatches_overall = Mutex::new(Vec::::new()); + + let (total_dirs, total_files): (usize, usize) = checksum_paths + .par_iter() + .map(|checksum_path| { + let album_dir = checksum_path.parent().unwrap(); + let res = verify_album(album_dir, checksum_path); + pb.inc(1); + match res { + Ok((files_verified, mismatches)) => { + if !mismatches.is_empty() { + let mut guard = mismatches_overall.lock().unwrap(); + for m in mismatches { + guard.push(format!("{}: {}", album_dir.display(), m)); + } + } + (1usize, files_verified) + } + Err(e) => { + eprintln!("Error verifying {}: {e}", checksum_path.display()); + (1usize, 0usize) + } + } + }) + .reduce(|| (0usize, 0usize), |a, b| (a.0 + b.0, a.1 + b.1)); + + pb.finish_and_clear(); + + if total_dirs == 0 { + println!("No checksum files named '{}' were found under {}", checksum_name, root.display()); + return Ok(()); + } + + let mismatches_vec = mismatches_overall.into_inner().unwrap(); + let elapsed = start.elapsed(); + let avg_ms = if total_dirs > 0 { elapsed.as_secs_f64() * 1000.0 / total_dirs as f64 } else { 0.0 }; + + if mismatches_vec.is_empty() { + println!( + "✅ Verified {files} files in {dirs} directories in {time} using {thr} threads (avg {avg:.2} ms/folder)", + files = total_files, + dirs = total_dirs, + time = human_duration(elapsed), + thr = rayon::current_num_threads(), + avg = avg_ms + ); + } else { + println!( + "❌ Verification found {bad} mismatching entries across {dirs} directories in {time} using {thr} threads (avg {avg:.2} ms/folder)", + bad = mismatches_vec.len(), + dirs = total_dirs, + time = human_duration(elapsed), + thr = rayon::current_num_threads(), + avg = avg_ms + ); + for m in mismatches_vec { + println!("- {}", m); + } + } + + Ok(()) +} + +fn verify_album(dir: &Path, checksum_path: &Path) -> Result<(usize, Vec)> { + let file = std::fs::File::open(&checksum_path) + .with_context(|| format!("Could not open checksum file at {}", checksum_path.display()))?; + let reader = std::io::BufReader::new(file); + + let lines: Vec = reader.lines().filter_map(|l| l.ok()).collect(); + + let results: Vec<(bool, String)> = lines + .par_iter() + .filter(|line| { + let l = line.trim(); + !l.is_empty() && !l.starts_with('#') + }) + .map(|line| { + let mut split = line.splitn(2, ' '); + let hash_str = split.next().unwrap(); + let path_str = split.next().unwrap().trim_start_matches(' '); + let full_path = dir.join(path_str); + if !full_path.exists() { + return (false, format!("Missing file: {}", path_str)); + } + match compute_hash(&full_path) { + Ok(current_hash) => { + if current_hash != hash_str { + (false, format!("Hash mismatch: {}", path_str)) + } else { + (true, path_str.to_string()) + } + } + Err(e) => (false, format!("{}: {}", path_str, e)), + } + }) + .collect(); + + let mut total = 0usize; + let mut mismatches = Vec::new(); + for (ok, msg) in results { + if ok { + total += 1; + } else { + mismatches.push(msg); + } + } + + Ok((total, mismatches)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + use std::fs; + + #[test] + fn verification_never_touches_audio_files() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create album directory + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files with known content + let audio_files = vec![ + ("song1.flac", "flac audio data1".as_bytes()), + ("song2.mp3", "mp3 audio data22".as_bytes()), + ("track.wav", "wav audio data33".as_bytes()), + ]; + + let mut original_audio_metadata = Vec::new(); + let mut checksum_content = String::new(); + + // Create audio files, store metadata, and prepare checksum content + for (name, content) in &audio_files { + let file_path = album_dir.join(name); + fs::write(&file_path, *content)?; + + let metadata = fs::metadata(&file_path)?; + let original_content = fs::read(&file_path)?; + let original_hash = blake3::hash(&original_content); + + // Compute actual hash for checksum file + let computed_hash = crate::utils::compute_hash(&file_path)?; + checksum_content.push_str(&format!("{} {}\n", computed_hash, name)); + + original_audio_metadata.push((file_path, metadata.len(), metadata.modified()?, original_hash)); + } + + // Create checksum file + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // Make audio files read-only to prove verification never writes to them + for (file_path, _, _, _) in &original_audio_metadata { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(file_path)?.permissions(); + perms.set_mode(0o444); // read-only + fs::set_permissions(file_path, perms)?; + } + #[cfg(windows)] + { + let mut perms = fs::metadata(file_path)?.permissions(); + perms.set_readonly(true); + fs::set_permissions(file_path, perms)?; + } + } + + // Run verification - this should only READ audio files + verify_checksums(root, "checksums.txt")?; + + // CRITICAL: Verify NO audio files were modified during verification + for (i, (file_path, original_size, original_time, original_content_hash)) in original_audio_metadata.iter().enumerate() { + let current_metadata = fs::metadata(file_path)?; + let current_content = fs::read(file_path)?; + let current_content_hash = blake3::hash(¤t_content); + + assert_eq!(current_metadata.len(), *original_size, + "Audio file {} size changed during verification!", audio_files[i].0); + assert_eq!(current_content_hash, *original_content_hash, + "Audio file {} content changed during verification!", audio_files[i].0); + assert_eq!(current_metadata.modified()?, *original_time, + "Audio file {} was modified during verification!", audio_files[i].0); + } + + println!("✅ PROOF: Verification only reads audio files, never modifies them"); + Ok(()) + } + + #[test] + fn corrupted_files_detected_but_not_fixed() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio file + let audio_file = album_dir.join("song.flac"); + fs::write(&audio_file, b"original content")?; + + // Create checksum for original content + let original_hash = crate::utils::compute_hash(&audio_file)?; + let checksum_content = format!("{} song.flac\n", original_hash); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // "Corrupt" the audio file by changing its content + fs::write(&audio_file, b"corrupted content")?; + + // Verification should detect the mismatch but not modify the file + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok(), "Verification should complete even with mismatches"); + + // Verify the "corrupted" file wasn't "fixed" - verification is read-only + let current_content = fs::read(&audio_file)?; + assert_eq!(current_content, b"corrupted content"); + + println!("✅ PROOF: Verification detects corruption but never modifies files"); + Ok(()) + } + + #[test] + fn readonly_checksum_files_work_fine() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio file and checksum + let audio_file = album_dir.join("song.flac"); + fs::write(&audio_file, b"audio content")?; + + let hash = crate::utils::compute_hash(&audio_file)?; + let checksum_content = format!("{} song.flac\n", hash); + let checksum_file = album_dir.join("checksums.txt"); + fs::write(&checksum_file, &checksum_content)?; + + // Make checksum file read-only + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = fs::metadata(&checksum_file)?.permissions(); + perms.set_mode(0o444); // read-only + fs::set_permissions(&checksum_file, perms)?; + } + #[cfg(windows)] + { + let mut perms = fs::metadata(&checksum_file)?.permissions(); + perms.set_readonly(true); + fs::set_permissions(&checksum_file, perms)?; + } + + // Verification should work fine with read-only checksum files + verify_checksums(root, "checksums.txt")?; + + // Verify checksum file is unchanged + let current_content = fs::read_to_string(&checksum_file)?; + assert_eq!(current_content, checksum_content); + + println!("✅ PROOF: Verification works with read-only checksum files"); + Ok(()) + } + + #[test] + fn missing_files_detected_properly() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create checksum for files that don't exist + let checksum_content = format!( + "{} missing_track.flac\n{} also_missing.mp3\n", + "a".repeat(64), "b".repeat(64) + ); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // Verification should complete but report mismatches + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok(), "Should complete even with missing files"); + + Ok(()) + } + + #[test] + fn corrupted_checksum_file_handled() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create actual audio file + fs::write(album_dir.join("song.flac"), b"good audio")?; + + // Create malformed checksum file + let bad_checksum = "this is not a valid checksum format\nno spaces here either\n just spaces \n"; + fs::write(album_dir.join("checksums.txt"), bad_checksum)?; + + // Should handle gracefully, not crash + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok(), "Should handle malformed checksum files gracefully"); + + Ok(()) + } + + #[test] + fn multiple_albums_verified_in_parallel() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create multiple albums with different structures + let albums = vec![ + ("Artist1/Album1", vec!["track1.flac", "track2.flac"]), + ("Artist2/Album2", vec!["song.mp3"]), + ("Various/Compilation", vec!["01.wav", "02.wav", "03.wav"]), + ]; + + for (album_path, tracks) in &albums { + let album_dir = root.join(album_path); + fs::create_dir_all(&album_dir)?; + + let mut checksum_content = String::new(); + for track in tracks { + let content = format!("content for {}", track); + fs::write(album_dir.join(track), content.as_bytes())?; + + let hash = crate::utils::compute_hash(&album_dir.join(track))?; + checksum_content.push_str(&format!("{} {}\n", hash, track)); + } + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + } + + // Verify all albums - should process them in parallel + verify_checksums(root, "checksums.txt")?; + + Ok(()) + } + + #[test] + fn comments_and_empty_lines_ignored() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio file + fs::write(album_dir.join("track.flac"), b"audio content")?; + let hash = crate::utils::compute_hash(&album_dir.join("track.flac"))?; + + // Create checksum with comments and empty lines + let checksum_with_noise = format!( + "# This is a comment\n\n{} track.flac\n# Another comment\n\n \n# End comments\n", + hash + ); + fs::write(album_dir.join("checksums.txt"), &checksum_with_noise)?; + + // Should verify successfully, ignoring comments and empty lines + verify_checksums(root, "checksums.txt")?; + + Ok(()) + } + + #[test] + fn case_insensitive_checksum_file_search() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio file and checksum + fs::write(album_dir.join("song.flac"), b"audio")?; + let hash = crate::utils::compute_hash(&album_dir.join("song.flac"))?; + + // Create checksum file with different case + fs::write(album_dir.join("CHECKSUMS.TXT"), format!("{} song.flac\n", hash))?; + + // Should find the checksum file regardless of case + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok(), "Should find checksum files case-insensitively"); + + Ok(()) + } + + #[test] + fn whitespace_variations_in_checksum_format() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files + fs::write(album_dir.join("track1.flac"), b"audio1")?; + fs::write(album_dir.join("track2.flac"), b"audio2")?; + + let hash1 = crate::utils::compute_hash(&album_dir.join("track1.flac"))?; + let hash2 = crate::utils::compute_hash(&album_dir.join("track2.flac"))?; + + // Create checksum with varying whitespace (real-world variation) + let checksum_content = format!( + "{} track1.flac\n{} track2.flac\n", // Different number of spaces + hash1, hash2 + ); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // Should handle different whitespace patterns + verify_checksums(root, "checksums.txt")?; + + Ok(()) + } + + #[test] + fn non_directory_path_rejected_in_verify() -> Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("not_a_directory.txt"); + fs::write(&file_path, b"this is a file")?; + + // Try to verify a file instead of a directory - should error + let result = verify_checksums(&file_path, "checksums.txt"); + assert!(result.is_err()); + + let error_msg = format!("{}", result.unwrap_err()); + assert!(error_msg.contains("not a directory")); + + Ok(()) + } + + #[test] + fn no_checksum_files_found_message() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + // Create directories but no checksum files + fs::create_dir_all(root.join("Album1"))?; + fs::create_dir_all(root.join("Album2"))?; + fs::write(root.join("Album1").join("song.flac"), b"audio")?; + + // This should trigger the "No checksum files found" path (lines 51-53) + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok()); + + Ok(()) + } + + #[test] + fn verification_with_mismatches_found() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files + fs::write(album_dir.join("file1.flac"), b"correct content")?; + fs::write(album_dir.join("file2.mp3"), b"wrong content")?; + + // Create checksum file with one correct and one incorrect hash + let correct_hash = crate::utils::compute_hash(&album_dir.join("file1.flac"))?; + let wrong_hash = "0".repeat(64); // Definitely wrong hash + + let checksum_content = format!( + "{} file1.flac\n{} file2.mp3\n", + correct_hash, wrong_hash + ); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // This should trigger the mismatches output path (lines 62-63) + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok()); + + Ok(()) + } + + #[test] + fn hash_error_handling_in_verification() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create audio files + fs::write(album_dir.join("good.flac"), b"audio content")?; + let good_hash = crate::utils::compute_hash(&album_dir.join("good.flac"))?; + + // Create checksum file with valid entry + let checksum_content = format!("{} good.flac\n", good_hash); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // This ensures normal verification works and hits the success paths + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok()); + + Ok(()) + } + + #[test] + fn verify_album_error_paths() -> Result<()> { + let dir = tempdir()?; + let root = dir.path(); + + let album_dir = root.join("Album"); + fs::create_dir_all(&album_dir)?; + + // Create a valid audio file + fs::write(album_dir.join("song.flac"), b"audio content")?; + let valid_hash = crate::utils::compute_hash(&album_dir.join("song.flac"))?; + + // Create checksum file with various error conditions + let checksum_content = format!( + "{} song.flac\n{} nonexistent.mp3\n", + valid_hash, + "0".repeat(64) // Valid format but points to non-existent file + ); + fs::write(album_dir.join("checksums.txt"), &checksum_content)?; + + // This should hit the missing file error path and the Err(e) branch + let result = verify_checksums(root, "checksums.txt"); + assert!(result.is_ok()); + + Ok(()) + } +} \ No newline at end of file