@@ -138,6 +138,20 @@ enum Command {
138138 #[ arg( short = 'x' , long, default_value_t = num_cpus( ) ) ]
139139 extract_threads : usize ,
140140 } ,
141+
142+ /// Manage the zerostart cache
143+ Cache {
144+ #[ command( subcommand) ]
145+ action : CacheAction ,
146+ } ,
147+ }
148+
149+ #[ derive( Subcommand ) ]
150+ enum CacheAction {
151+ /// Show cache size and location
152+ Info ,
153+ /// Remove all cached data (environments, shared wheels, resolution cache)
154+ Clean ,
141155}
142156
143157fn num_cpus ( ) -> usize {
@@ -462,46 +476,143 @@ fn restore_from_shared_cache(spec: &WheelSpec, site_packages: &Path) -> bool {
462476
463477/// Populate the shared cache from a freshly extracted wheel in site-packages.
464478///
465- /// We identify the wheel's files by looking for its . dist-info directory,
466- /// then copy the top-level dirs that belong to it into the cache .
479+ /// Uses the RECORD file from dist-info to get the exact list of installed files.
480+ /// Falls back to heuristic matching if RECORD is missing .
467481fn populate_shared_cache ( spec : & WheelSpec , site_packages : & Path ) {
468482 let cache_path = shared_wheel_cache_dir ( spec) ;
469483 if cache_path. join ( ".complete" ) . exists ( ) {
470484 return ; // already cached
471485 }
472486
473- if std:: fs:: create_dir_all ( & cache_path) . is_err ( ) {
487+ // Use a staging dir for atomic population
488+ let staging = cache_path. with_extension ( "staging" ) ;
489+ let _ = std:: fs:: remove_dir_all ( & staging) ; // clean stale staging
490+ if std:: fs:: create_dir_all ( & staging) . is_err ( ) {
474491 return ;
475492 }
476493
477- // Find this wheel's dist-info and import roots in site-packages
494+ // Find dist-info directory for this package
478495 let norm = spec. distribution . replace ( '-' , "_" ) . to_lowercase ( ) ;
496+ let dist_info = find_dist_info ( site_packages, & norm) ;
497+
498+ // Try RECORD-based populate first (ground truth)
499+ let populated = if let Some ( ref di) = dist_info {
500+ populate_from_record ( site_packages, di, & staging)
501+ } else {
502+ false
503+ } ;
504+
505+ // Fallback: copy dist-info + known import roots
506+ if !populated {
507+ populate_heuristic ( site_packages, & norm, & spec. import_roots , & staging) ;
508+ }
509+
510+ // Atomic commit: rename staging → final
511+ if let Ok ( ( ) ) = std:: fs:: create_dir_all ( cache_path. parent ( ) . unwrap_or ( Path :: new ( "." ) ) ) {
512+ let _ = std:: fs:: rename ( & staging, & cache_path) ;
513+ let _ = std:: fs:: File :: create ( cache_path. join ( ".complete" ) ) ;
514+ } else {
515+ let _ = std:: fs:: remove_dir_all ( & staging) ;
516+ }
517+ }
518+
519+ /// Find the dist-info directory for a package in site-packages.
520+ fn find_dist_info ( site_packages : & Path , norm_name : & str ) -> Option < std:: path:: PathBuf > {
521+ let entries = std:: fs:: read_dir ( site_packages) . ok ( ) ?;
522+ for entry in entries. flatten ( ) {
523+ let name = entry. file_name ( ) ;
524+ let name_str = name. to_string_lossy ( ) . to_string ( ) ;
525+ if name_str. ends_with ( ".dist-info" ) {
526+ let stem = name_str. trim_end_matches ( ".dist-info" ) ;
527+ let pkg = stem. split ( '-' ) . next ( ) . unwrap_or ( stem) ;
528+ if pkg. replace ( '-' , "_" ) . to_lowercase ( ) == * norm_name {
529+ return Some ( entry. path ( ) ) ;
530+ }
531+ }
532+ }
533+ None
534+ }
535+
536+ /// Populate cache from RECORD file — lists every file the wheel installed.
537+ ///
538+ /// RECORD format: `path,hash,size` per line. Paths are relative to site-packages.
539+ /// Returns true if RECORD was found and files were copied.
540+ fn populate_from_record ( site_packages : & Path , dist_info : & Path , staging : & Path ) -> bool {
541+ let record_path = dist_info. join ( "RECORD" ) ;
542+ let content = match std:: fs:: read_to_string ( & record_path) {
543+ Ok ( c) => c,
544+ Err ( _) => return false ,
545+ } ;
546+
547+ // Collect unique top-level dirs/files from RECORD
548+ let mut top_level_entries: std:: collections:: HashSet < String > = std:: collections:: HashSet :: new ( ) ;
549+ for line in content. lines ( ) {
550+ let path = line. split ( ',' ) . next ( ) . unwrap_or ( "" ) . trim ( ) ;
551+ if path. is_empty ( ) {
552+ continue ;
553+ }
554+ // Top-level entry is the first path component
555+ if let Some ( top) = path. split ( '/' ) . next ( ) {
556+ top_level_entries. insert ( top. to_string ( ) ) ;
557+ }
558+ }
559+
560+ if top_level_entries. is_empty ( ) {
561+ return false ;
562+ }
563+
564+ let mut copied_any = false ;
565+ for entry_name in & top_level_entries {
566+ let src = site_packages. join ( entry_name) ;
567+ let dst = staging. join ( entry_name) ;
568+ if src. is_dir ( ) {
569+ if copy_dir_recursive ( & src, & dst) . is_ok ( ) {
570+ copied_any = true ;
571+ }
572+ } else if src. is_file ( ) {
573+ if let Some ( parent) = dst. parent ( ) {
574+ let _ = std:: fs:: create_dir_all ( parent) ;
575+ }
576+ if std:: fs:: copy ( & src, & dst) . is_ok ( ) {
577+ copied_any = true ;
578+ }
579+ }
580+ }
581+
582+ copied_any
583+ }
584+
585+ /// Fallback: populate cache using heuristic name matching (dist-info + import roots).
586+ fn populate_heuristic (
587+ site_packages : & Path ,
588+ norm_name : & str ,
589+ import_roots : & [ String ] ,
590+ staging : & Path ,
591+ ) {
479592 if let Ok ( entries) = std:: fs:: read_dir ( site_packages) {
480593 for entry in entries. flatten ( ) {
481594 let name = entry. file_name ( ) ;
482595 let name_str = name. to_string_lossy ( ) . to_string ( ) ;
483596
484- // Match dist-info dir or import root dirs
485597 let is_dist_info = name_str. ends_with ( ".dist-info" ) && {
486598 let stem = name_str. trim_end_matches ( ".dist-info" ) ;
487599 let pkg = stem. split ( '-' ) . next ( ) . unwrap_or ( stem) ;
488- pkg. replace ( '-' , "_" ) . to_lowercase ( ) == norm
600+ pkg. replace ( '-' , "_" ) . to_lowercase ( ) == * norm_name
489601 } ;
490602
491603 let is_data_dir = name_str. ends_with ( ".data" ) && {
492604 let stem = name_str. trim_end_matches ( ".data" ) ;
493605 let pkg = stem. split ( '-' ) . next ( ) . unwrap_or ( stem) ;
494- pkg. replace ( '-' , "_" ) . to_lowercase ( ) == norm
606+ pkg. replace ( '-' , "_" ) . to_lowercase ( ) == * norm_name
495607 } ;
496608
497- let is_import_root = spec
498- . import_roots
609+ let is_import_root = import_roots
499610 . iter ( )
500- . any ( |r| r == & name_str || name_str == format ! ( "{norm }.py" ) ) ;
611+ . any ( |r| r == & name_str || name_str == format ! ( "{norm_name }.py" ) ) ;
501612
502613 if is_dist_info || is_data_dir || is_import_root {
503614 let src = entry. path ( ) ;
504- let dst = cache_path . join ( & name) ;
615+ let dst = staging . join ( & name) ;
505616 if src. is_dir ( ) {
506617 let _ = copy_dir_recursive ( & src, & dst) ;
507618 } else {
@@ -510,9 +621,6 @@ fn populate_shared_cache(spec: &WheelSpec, site_packages: &Path) {
510621 }
511622 }
512623 }
513-
514- // Mark cache as complete
515- let _ = std:: fs:: File :: create ( cache_path. join ( ".complete" ) ) ;
516624}
517625
518626/// Recursively hardlink all files from src tree into dst.
@@ -533,9 +641,14 @@ fn hardlink_tree(src: &Path, dst: &Path) -> Result<()> {
533641 if src_path. is_dir ( ) {
534642 std:: fs:: create_dir_all ( & dst_path) ?;
535643 hardlink_tree ( & src_path, & dst_path) ?;
536- } else {
644+ } else if !dst_path . exists ( ) {
537645 // Try hardlink first, fall back to copy (cross-device)
538- if std:: fs:: hard_link ( & src_path, & dst_path) . is_err ( ) {
646+ if let Err ( e) = std:: fs:: hard_link ( & src_path, & dst_path) {
647+ tracing:: debug!(
648+ "Hardlink failed ({}), falling back to copy: {}" ,
649+ e,
650+ src_path. display( )
651+ ) ;
539652 std:: fs:: copy ( & src_path, & dst_path) ?;
540653 }
541654 }
@@ -841,8 +954,83 @@ async fn main() -> Result<()> {
841954 eprintln ! ( " {} ({:.1} MB)" , w. distribution, w. size as f64 / 1024.0 / 1024.0 ) ;
842955 }
843956
844- run_engine ( wheels, site_packages, parallel_downloads, extract_threads) . await
957+ run_engine ( wheels. clone ( ) , site_packages. clone ( ) , parallel_downloads, extract_threads) . await ?;
958+
959+ // Populate shared cache for future environments
960+ eprintln ! ( "Populating shared cache..." ) ;
961+ for spec in & wheels {
962+ populate_shared_cache ( spec, & site_packages) ;
963+ }
964+ eprintln ! ( "Shared cache populated ({} wheels)" , wheels. len( ) ) ;
965+
966+ Ok ( ( ) )
845967 }
968+
969+ Command :: Cache { action } => {
970+ let base = cache_dir ( ) ;
971+ match action {
972+ CacheAction :: Info => {
973+ let envs = dir_size ( & base. join ( "envs" ) ) ;
974+ let shared = dir_size ( & base. join ( "shared_wheels" ) ) ;
975+ let pylock = dir_size ( & base. join ( "pylock" ) ) ;
976+ let total = envs + shared + pylock;
977+
978+ eprintln ! ( "Cache directory: {}" , base. display( ) ) ;
979+ eprintln ! ( " Environments: {}" , human_size( envs) ) ;
980+ eprintln ! ( " Shared wheels: {}" , human_size( shared) ) ;
981+ eprintln ! ( " Resolution: {}" , human_size( pylock) ) ;
982+ eprintln ! ( " Total: {}" , human_size( total) ) ;
983+ Ok ( ( ) )
984+ }
985+ CacheAction :: Clean => {
986+ let size = dir_size ( & base) ;
987+ if base. exists ( ) {
988+ std:: fs:: remove_dir_all ( & base)
989+ . context ( "failed to remove cache directory" ) ?;
990+ eprintln ! ( "Removed {} ({})" , base. display( ) , human_size( size) ) ;
991+ } else {
992+ eprintln ! ( "Cache directory does not exist: {}" , base. display( ) ) ;
993+ }
994+ Ok ( ( ) )
995+ }
996+ }
997+ }
998+ }
999+ }
1000+
1001+ /// Recursively compute directory size in bytes.
1002+ fn dir_size ( path : & Path ) -> u64 {
1003+ if !path. exists ( ) {
1004+ return 0 ;
1005+ }
1006+ let mut total = 0u64 ;
1007+ if let Ok ( entries) = std:: fs:: read_dir ( path) {
1008+ for entry in entries. flatten ( ) {
1009+ let ft = entry. file_type ( ) . unwrap_or_else ( |_| {
1010+ std:: fs:: symlink_metadata ( entry. path ( ) )
1011+ . map ( |m| m. file_type ( ) )
1012+ . unwrap_or_else ( |_| entry. file_type ( ) . unwrap ( ) )
1013+ } ) ;
1014+ if ft. is_dir ( ) {
1015+ total += dir_size ( & entry. path ( ) ) ;
1016+ } else {
1017+ total += entry. metadata ( ) . map ( |m| m. len ( ) ) . unwrap_or ( 0 ) ;
1018+ }
1019+ }
1020+ }
1021+ total
1022+ }
1023+
1024+ /// Format bytes as human-readable size.
1025+ fn human_size ( bytes : u64 ) -> String {
1026+ if bytes >= 1024 * 1024 * 1024 {
1027+ format ! ( "{:.1} GB" , bytes as f64 / 1024.0 / 1024.0 / 1024.0 )
1028+ } else if bytes >= 1024 * 1024 {
1029+ format ! ( "{:.1} MB" , bytes as f64 / 1024.0 / 1024.0 )
1030+ } else if bytes >= 1024 {
1031+ format ! ( "{:.1} KB" , bytes as f64 / 1024.0 )
1032+ } else {
1033+ format ! ( "{bytes} B" )
8461034 }
8471035}
8481036
0 commit comments