Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 101 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ object = "0.37"
opendal = { version = "0.55.0", optional = true, default-features = false }
openssl = { version = "0.10.75", optional = true }
rand = "0.8.4"
reflink-copy = "0.1"
regex = "1.10.3"
reqsign = { version = "0.18.0", optional = true }
reqwest = { version = "0.12", features = [
Expand Down
52 changes: 52 additions & 0 deletions docs/FileClone.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# FileClone Storage

## Overview

The `file_clone` option enables uncompressed cache storage with Copy-on-Write (CoW) filesystem support for faster cache hits.

## Configuration

Add to your sccache config file (e.g., `~/.config/sccache/config`):

```toml
[cache.disk]
file_clone = true
```

Or set via environment variable:

```bash
export SCCACHE_FILE_CLONE=true
```

## How it Works

When `file_clone` is enabled:

1. **Detection**: sccache checks if the cache directory is on a CoW filesystem (APFS on macOS, Btrfs/XFS on Linux)
2. **Uncompressed Storage**: Cache entries are stored as directories with raw files instead of ZIP+zstd
3. **Reflink Extraction**: On cache hit, files are copied using reflink (near-instant on CoW filesystems)
4. **Fallback**: If CoW is not supported, automatically falls back to traditional compressed storage

## Performance Benefits

On CoW filesystems:
- Near-zero copy time for cached files (reflink uses filesystem-level COW)
- Reduced CPU usage (no decompression step)
- Trade-off: Slightly higher disk usage (uncompressed files)

## Compatibility

Works on:
- macOS with APFS
- Linux with Btrfs
- Linux with XFS
- Other filesystems with reflink support

If the filesystem doesn't support reflink, sccache automatically uses compressed storage and logs a warning.

## Implementation Details

- Cache entries stored as directories under `cache/a/b/{hash}/`
- Each directory contains: `{object_name}`, `stdout`, `stderr`
- Original ZIP+zstd format still supported for backwards compatibility
1 change: 1 addition & 0 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ pub fn storage_from_config(
preprocessor_cache_mode_config,
rw_mode,
config.basedirs.clone(),
config.fallback_cache.file_clone,
)))
}

Expand Down
82 changes: 81 additions & 1 deletion src/cache/cache_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ pub struct FileObjectSource {

/// Result of a cache lookup.
pub enum Cache {
/// Result was found in cache.
/// Result was found in cache (compressed ZIP format).
Hit(CacheRead),
/// Result was found in cache (uncompressed directory format).
UncompressedHit(UncompressedCacheEntry),
/// Result was not found in cache.
Miss,
/// Do not cache the results of the compilation.
Expand All @@ -48,6 +50,7 @@ impl fmt::Debug for Cache {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Cache::Hit(_) => write!(f, "Cache::Hit(...)"),
Cache::UncompressedHit(_) => write!(f, "Cache::UncompressedHit(...)"),
Cache::Miss => write!(f, "Cache::Miss"),
Cache::None => write!(f, "Cache::None"),
Cache::Recache => write!(f, "Cache::Recache"),
Expand Down Expand Up @@ -268,3 +271,80 @@ impl Default for CacheWrite {
Self::new()
}
}

/// An uncompressed cache entry stored as a directory.
#[derive(Debug)]
pub struct UncompressedCacheEntry {
pub(crate) dir: PathBuf,
}

impl UncompressedCacheEntry {
pub fn new(dir: PathBuf) -> Self {
Self { dir }
}

pub async fn extract_objects<T>(self, objects: T, pool: &tokio::runtime::Handle) -> Result<()>
where
T: IntoIterator<Item = FileObjectSource> + Send + Sync + 'static,
{
pool.spawn_blocking(move || {
for FileObjectSource {
key,
path,
optional,
} in objects
{
let src = self.dir.join(&key);

if !src.exists() {
if optional {
continue;
}
bail!("Required object '{}' not found in cache", key);
}

let dir = path
.parent()
.context("Output file without a parent directory!")?;
fs::create_dir_all(dir)?;

// Read permissions from the cached source file directly
let mode = get_file_mode(&fs::File::open(&src)?);

// Write to a tempfile and then atomically rename to the final path,
// so parallel builds don't see partially-written files.
let tmp_path = NamedTempFile::new_in(dir)?.into_temp_path();
// Remove the empty temp file so reflink can create the destination
let _ = std::fs::remove_file(&tmp_path);

if let Err(e) = crate::reflink::reflink_or_copy(&src, &tmp_path) {
if !optional {
bail!("Failed to copy object '{}' to {:?}: {}", key, path, e);
}
continue;
}

tmp_path.persist(&path).map_err(|e| {
anyhow::anyhow!("Failed to persist {:?} to {:?}: {}", e.path, path, e.error)
})?;

if let Ok(Some(mode)) = mode {
set_file_mode(&path, mode)?;
}
}

Ok(())
})
.await?
}

pub fn get_stdout(&self) -> Vec<u8> {
let path = self.dir.join("stdout");
fs::read(&path).unwrap_or_default()
}

pub fn get_stderr(&self) -> Vec<u8> {
let path = self.dir.join("stderr");
fs::read(&path).unwrap_or_default()
}
}
Loading