diff --git a/src/authorship/authorship_log_serialization.rs b/src/authorship/authorship_log_serialization.rs index 62ff33ac2e..f48a2a41d7 100644 --- a/src/authorship/authorship_log_serialization.rs +++ b/src/authorship/authorship_log_serialization.rs @@ -1,7 +1,6 @@ use crate::authorship::authorship_log::{ Author, HumanRecord, LineRange, PromptRecord, SessionRecord, }; -use crate::authorship::working_log::CheckpointKind; use crate::git::repository::Repository; use rand::RngExt; use serde::{Deserialize, Serialize}; @@ -9,7 +8,6 @@ use sha2::{Digest, Sha256}; use std::collections::{BTreeMap, HashMap}; use std::fmt; use std::io::{BufRead, Write}; -use std::time::{SystemTime, UNIX_EPOCH}; /// Authorship log format version identifier pub const AUTHORSHIP_LOG_VERSION: &str = "authorship/3.0.0"; @@ -74,33 +72,6 @@ impl AttestationEntry { pub fn new(hash: String, line_ranges: Vec) -> Self { Self { hash, line_ranges } } - - #[allow(dead_code)] - pub fn remove_line_ranges(&mut self, to_remove: &[LineRange]) { - let mut current_ranges = self.line_ranges.clone(); - - for remove_range in to_remove { - let mut new_ranges = Vec::new(); - for existing_range in ¤t_ranges { - new_ranges.extend(existing_range.remove(remove_range)); - } - current_ranges = new_ranges; - } - - self.line_ranges = current_ranges; - } - - /// Shift line ranges by a given offset starting at insertion_point - #[allow(dead_code)] - pub fn shift_line_ranges(&mut self, insertion_point: u32, offset: i32) { - let mut shifted_ranges = Vec::new(); - for range in &self.line_ranges { - if let Some(shifted) = range.shift(insertion_point, offset) { - shifted_ranges.push(shifted); - } - } - self.line_ranges = shifted_ranges; - } } /// Per-file attestation data @@ -356,214 +327,6 @@ impl AuthorshipLog { } None } - - /// Convert authorship log to working log checkpoints for merge --squash - /// - /// Creates one checkpoint per file per session that touched that file. This ensures that: - /// - Each checkpoint has a single file entry - /// - Blobs can be saved individually per checkpoint without ordering issues - /// - Future diffs are computed against the correct base state - /// - /// # Arguments - /// * `_human_author` - Unused (human checkpoints are not created for squash merges) - /// - /// # Returns - /// Vector of checkpoints, one per file per session (no human checkpoint) - #[allow(dead_code)] - pub fn convert_to_checkpoints_for_squash( - &self, - file_contents: &HashMap, - ) -> Result, Box> { - use crate::authorship::attribution_tracker::{ - LineAttribution, line_attributions_to_attributions, - }; - use crate::authorship::authorship_log::PromptRecord; - use crate::authorship::working_log::{Checkpoint, WorkingLogEntry}; - use std::collections::{HashMap, HashSet}; - - let mut checkpoints = Vec::new(); - - // Get the current timestamp in milliseconds since the Unix epoch - let ts = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_millis(); - - // Track all files that have attestations - let mut all_files: HashSet = HashSet::new(); - for file_attestation in &self.attestations { - all_files.insert(file_attestation.file_path.clone()); - } - - // Build AI checkpoints - one per file - // For each file, we need to collect all the sessions that contributed to it - for file_path in &all_files { - // Find the file attestation - let file_attestation = - match self.attestations.iter().find(|f| f.file_path == *file_path) { - Some(f) => f, - None => continue, - }; - - // Group entries by session hash to preserve prompt information - let mut session_lines: HashMap> = HashMap::new(); - for entry in &file_attestation.entries { - session_lines - .entry(entry.hash.clone()) - .or_default() - .extend(entry.line_ranges.clone()); - } - - if session_lines.is_empty() { - continue; - } - - let file_content = file_contents - .get(file_path) - .ok_or_else(|| format!("Missing file content for: {}", file_path))?; - - // Sort sessions for deterministic output - let mut session_entries: Vec<(String, Vec)> = - session_lines.into_iter().collect(); - session_entries.sort_by(|a, b| a.0.cmp(&b.0)); - - let mut combined_line_attributions: Vec = Vec::new(); - let mut session_prompt_records: Vec = Vec::new(); - - for (session_hash, ranges) in &session_entries { - // Skip known-human attestations — they don't have prompt records - if session_hash.starts_with("h_") { - continue; - } - - // s_-prefixed hashes are session attestations — look up in sessions map - let prompt_record = if session_hash.starts_with("s_") { - let session_key = session_hash.split("::").next().unwrap_or(session_hash); - self.metadata - .sessions - .get(session_key) - .ok_or_else(|| { - format!("Missing session record for hash: {}", session_hash) - })? - .to_prompt_record() - } else { - self.metadata - .prompts - .get(session_hash) - .ok_or_else(|| format!("Missing prompt record for hash: {}", session_hash))? - .clone() - }; - - // Expand ranges to individual lines, then compress to working log format - let mut all_lines: Vec = Vec::new(); - for range in ranges { - all_lines.extend(range.expand()); - } - if all_lines.is_empty() { - continue; - } - all_lines.sort_unstable(); - all_lines.dedup(); - - // IMPORTANT: Use the session_hash that will be regenerated from agent_id when applying checkpoint - // This ensures line attributions match the prompts in metadata after apply_checkpoint - let prompt_hash = - generate_short_hash(&prompt_record.agent_id.id, &prompt_record.agent_id.tool); - // TODO Update authorship to store overridden state for line ranges - let line_attributions = - compress_lines_to_working_log_format(&all_lines, &prompt_hash, None); - - combined_line_attributions.extend(line_attributions); - session_prompt_records.push(prompt_record); - } - - if combined_line_attributions.is_empty() { - continue; - } - - combined_line_attributions.sort_by(|a, b| { - a.start_line - .cmp(&b.start_line) - .then(a.end_line.cmp(&b.end_line)) - .then(a.author_id.cmp(&b.author_id)) - }); - - let attributions = line_attributions_to_attributions( - &combined_line_attributions, - file_content.as_str(), - ts, - ); - - for prompt_record in session_prompt_records { - let entry = WorkingLogEntry::new( - file_path.clone(), - String::new(), // Empty blob_sha - will be set by caller - attributions.clone(), - combined_line_attributions.clone(), - ); - - let mut ai_checkpoint = Checkpoint::new( - CheckpointKind::AiAgent, // TODO Pull exact from prompt record? - String::new(), // Empty diff hash - "ai".to_string(), - vec![entry], - ); - ai_checkpoint.agent_id = Some(prompt_record.agent_id.clone()); - - // TODO Fill in the LineStats - - // Reconstruct transcript from messages - // Transcript no longer stored in checkpoints - checkpoints.push(ai_checkpoint); - } - } - - Ok(checkpoints) - } -} - -/// Convert line numbers to working log Line format (Single/Range) -fn compress_lines_to_working_log_format( - lines: &[u32], - author_id: &str, - overrode: Option, -) -> Vec { - use crate::authorship::attribution_tracker::LineAttribution; - - if lines.is_empty() { - return vec![]; - } - - let mut result = Vec::new(); - let mut start = lines[0]; - let mut end = lines[0]; - - for &line in &lines[1..] { - if line == end + 1 { - // Consecutive line, extend range - end = line; - } else { - // Gap found, save current range and start new one - result.push(LineAttribution::new( - start, - end, - author_id.to_string(), - overrode.clone(), - )); - start = line; - end = line; - } - } - - // Add the final range - result.push(LineAttribution::new( - start, - end, - author_id.to_string(), - overrode.clone(), - )); - - result } impl Default for AuthorshipLog { @@ -1016,249 +779,6 @@ mod tests { assert_eq!(deserialized.attestations.len(), 0); } - #[test] - fn test_remove_line_ranges_complete_removal() { - let mut entry = - AttestationEntry::new("test_hash".to_string(), vec![LineRange::Range(2, 5)]); - - // Remove the exact same range - entry.remove_line_ranges(&[LineRange::Range(2, 5)]); - - // Should be empty after removing the exact range - assert!( - entry.line_ranges.is_empty(), - "Expected empty line_ranges after complete removal, got: {:?}", - entry.line_ranges - ); - } - - #[test] - fn test_remove_line_ranges_partial_removal() { - let mut entry = - AttestationEntry::new("test_hash".to_string(), vec![LineRange::Range(2, 10)]); - - // Remove middle part - entry.remove_line_ranges(&[LineRange::Range(5, 7)]); - - // Should have two ranges: [2-4] and [8-10] - assert_eq!(entry.line_ranges.len(), 2); - assert_eq!(entry.line_ranges[0], LineRange::Range(2, 4)); - assert_eq!(entry.line_ranges[1], LineRange::Range(8, 10)); - } - - #[test] - fn test_convert_authorship_log_to_checkpoints() { - use crate::authorship::transcript::{AiTranscript, Message}; - use crate::authorship::working_log::AgentId; - use std::collections::HashMap; - - // Create an authorship log with both AI and human-attributed lines - let mut log = AuthorshipLog::new(); - log.metadata.base_commit_sha = "base123".to_string(); - - // Add AI prompt session - let agent_id = AgentId { - tool: "cursor".to_string(), - id: "session_abc".to_string(), - model: "claude-3-sonnet".to_string(), - }; - let mut transcript = AiTranscript::new(); - transcript.add_message(Message::user("Add error handling".to_string(), None)); - transcript.add_message(Message::assistant("Added error handling".to_string(), None)); - - let session_hash = generate_short_hash(&agent_id.id, &agent_id.tool); - log.metadata.prompts.insert( - session_hash.clone(), - crate::authorship::authorship_log::PromptRecord { - agent_id: agent_id.clone(), - human_author: Some("alice@example.com".to_string()), - total_additions: 15, - total_deletions: 3, - accepted_lines: 11, - overriden_lines: 0, - custom_attributes: None, - messages_url: None, - }, - ); - - // Add file attestations - AI owns lines 1-5, 10-15 - let mut file1 = FileAttestation::new("src/main.rs".to_string()); - file1.add_entry(AttestationEntry::new( - session_hash.clone(), - vec![LineRange::Range(1, 5), LineRange::Range(10, 15)], - )); - log.attestations.push(file1); - - // Create file contents (11 lines total for AI-attributed lines) - let mut file_contents = HashMap::new(); - file_contents.insert( - "src/main.rs".to_string(), - "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10\nline11\nline12\nline13\nline14\nline15\n".to_string(), - ); - - // Convert to checkpoints - let result = log.convert_to_checkpoints_for_squash(&file_contents); - assert!(result.is_ok()); - let checkpoints = result.unwrap(); - - // Should have 1 checkpoint: 1 AI only (no human checkpoint) - assert_eq!(checkpoints.len(), 1); - - // Checkpoint should be AI with original lines - let ai_checkpoint = &checkpoints[0]; - assert_eq!(ai_checkpoint.author, "ai"); - assert!(ai_checkpoint.agent_id.is_some()); - assert_eq!(ai_checkpoint.agent_id.as_ref().unwrap().tool, "cursor"); - // Transcript field removed from Checkpoint - assert_eq!(ai_checkpoint.entries.len(), 1); - let ai_entry = &ai_checkpoint.entries[0]; - assert_eq!(ai_entry.file, "src/main.rs"); - - // Verify line attributions instead of added_lines/deleted_lines - assert!(!ai_entry.line_attributions.is_empty()); - // Should have line attributions for lines 1-5 and 10-15 - let total_lines: u32 = ai_entry - .line_attributions - .iter() - .map(|attr| attr.end_line - attr.start_line + 1) - .sum(); - assert_eq!(total_lines, 11); // 5 lines (1-5) + 6 lines (10-15) - } - - #[test] - fn test_convert_authorship_log_multiple_ai_sessions() { - use crate::authorship::transcript::{AiTranscript, Message}; - use crate::authorship::working_log::AgentId; - - // Create authorship log with 2 different AI sessions - let mut log = AuthorshipLog::new(); - log.metadata.base_commit_sha = "base456".to_string(); - - // First AI session - let agent1 = AgentId { - tool: "cursor".to_string(), - id: "session_1".to_string(), - model: "claude-3-sonnet".to_string(), - }; - let mut transcript1 = AiTranscript::new(); - transcript1.add_message(Message::user("Add function".to_string(), None)); - transcript1.add_message(Message::assistant("Added function".to_string(), None)); - let session1_hash = generate_short_hash(&agent1.id, &agent1.tool); - log.metadata.prompts.insert( - session1_hash.clone(), - crate::authorship::authorship_log::PromptRecord { - agent_id: agent1, - human_author: Some("bob@example.com".to_string()), - total_additions: 10, - total_deletions: 0, - accepted_lines: 10, - overriden_lines: 0, - custom_attributes: None, - messages_url: None, - }, - ); - - // Second AI session - let agent2 = AgentId { - tool: "cursor".to_string(), - id: "session_2".to_string(), - model: "claude-3-opus".to_string(), - }; - let mut transcript2 = AiTranscript::new(); - transcript2.add_message(Message::user("Add tests".to_string(), None)); - transcript2.add_message(Message::assistant("Added tests".to_string(), None)); - let session2_hash = generate_short_hash(&agent2.id, &agent2.tool); - log.metadata.prompts.insert( - session2_hash.clone(), - crate::authorship::authorship_log::PromptRecord { - agent_id: agent2, - human_author: Some("bob@example.com".to_string()), - total_additions: 20, - total_deletions: 0, - accepted_lines: 20, - overriden_lines: 0, - custom_attributes: None, - messages_url: None, - }, - ); - - // File with both sessions, plus some human lines - let mut file1 = FileAttestation::new("src/lib.rs".to_string()); - file1.add_entry(AttestationEntry::new( - session1_hash.clone(), - vec![LineRange::Range(1, 10)], - )); - file1.add_entry(AttestationEntry::new( - session2_hash.clone(), - vec![LineRange::Range(11, 30)], - )); - // Human owns lines 31-40 (implicitly, by not being in any AI attestation) - log.attestations.push(file1); - - // Create file contents - use std::collections::HashMap; - let mut file_contents = HashMap::new(); - let mut content = String::new(); - for i in 1..=30 { - content.push_str(&format!("line{}\n", i)); - } - file_contents.insert("src/lib.rs".to_string(), content); - - // Convert to checkpoints - let result = log.convert_to_checkpoints_for_squash(&file_contents); - assert!(result.is_ok()); - let checkpoints = result.unwrap(); - - // Should have 2 AI checkpoints (no human lines since we only have AI-attributed lines 1-30) - assert_eq!(checkpoints.len(), 2); - - // Both are AI sessions - let ai_checkpoints: Vec<_> = checkpoints - .iter() - .filter(|c| c.agent_id.is_some()) - .collect(); - assert_eq!(ai_checkpoints.len(), 2); - - // Verify that the AI sessions are distinct - assert_ne!( - ai_checkpoints[0].agent_id.as_ref().unwrap().id, - ai_checkpoints[1].agent_id.as_ref().unwrap().id - ); - - // Each checkpoint should contain the full attribution state for the file - assert_eq!(ai_checkpoints[0].entries.len(), 1); - assert_eq!(ai_checkpoints[1].entries.len(), 1); - let entry1 = &ai_checkpoints[0].entries[0]; - let entry2 = &ai_checkpoints[1].entries[0]; - assert_eq!(entry1.line_attributions, entry2.line_attributions); - assert_eq!(entry1.attributions, entry2.attributions); - assert!(!entry1.line_attributions.is_empty()); - assert!(!entry1.attributions.is_empty()); - - let total_lines: u32 = entry1 - .line_attributions - .iter() - .map(|attr| attr.end_line - attr.start_line + 1) - .sum(); - assert_eq!(total_lines, 30); - - let lines_session1: u32 = entry1 - .line_attributions - .iter() - .filter(|attr| attr.author_id.as_str() == session1_hash.as_str()) - .map(|attr| attr.end_line - attr.start_line + 1) - .sum(); - assert_eq!(lines_session1, 10); - - let lines_session2: u32 = entry1 - .line_attributions - .iter() - .filter(|attr| attr.author_id.as_str() == session2_hash.as_str()) - .map(|attr| attr.end_line - attr.start_line + 1) - .sum(); - assert_eq!(lines_session2, 20); - } - #[test] fn test_generate_human_short_hash() { let hash = generate_human_short_hash("Alice Smith "); @@ -1278,137 +798,6 @@ mod tests { ); } - /// Test that `convert_to_checkpoints_for_squash` correctly skips h_ attestation entries - /// rather than failing with "Missing prompt record". - #[test] - fn test_convert_to_checkpoints_skips_h_entries() { - use crate::authorship::transcript::{AiTranscript, Message}; - use crate::authorship::working_log::AgentId; - use std::collections::HashMap; - - let mut log = AuthorshipLog::new(); - log.metadata.base_commit_sha = "base123".to_string(); - - // AI session - let agent_id = AgentId { - tool: "cursor".to_string(), - id: "session_abc".to_string(), - model: "claude-3-sonnet".to_string(), - }; - let mut transcript = AiTranscript::new(); - transcript.add_message(Message::user("Write a helper".to_string(), None)); - transcript.add_message(Message::assistant("Here it is".to_string(), None)); - let ai_hash = generate_short_hash(&agent_id.id, &agent_id.tool); - log.metadata.prompts.insert( - ai_hash.clone(), - crate::authorship::authorship_log::PromptRecord { - agent_id, - human_author: None, - total_additions: 5, - total_deletions: 0, - accepted_lines: 5, - overriden_lines: 0, - custom_attributes: None, - messages_url: None, - }, - ); - - // Known-human attestation — h_ hash present in attestations but NOT in prompts. - let human_hash = generate_human_short_hash("Alice "); - log.metadata.humans.insert( - human_hash.clone(), - crate::authorship::authorship_log::HumanRecord { - author: "Alice".to_string(), - }, - ); - - // File: AI owns lines 1-5, human owns lines 6-10 - let mut file1 = FileAttestation::new("src/lib.rs".to_string()); - file1.add_entry(AttestationEntry::new( - ai_hash.clone(), - vec![LineRange::Range(1, 5)], - )); - file1.add_entry(AttestationEntry::new( - human_hash.clone(), - vec![LineRange::Range(6, 10)], - )); - log.attestations.push(file1); - - let mut file_contents = HashMap::new(); - let content: String = (1..=10).map(|i| format!("line{}\n", i)).collect(); - file_contents.insert("src/lib.rs".to_string(), content); - - // Must succeed — h_ entry must be silently skipped - let result = log.convert_to_checkpoints_for_squash(&file_contents); - assert!( - result.is_ok(), - "convert_to_checkpoints_for_squash should not fail on h_ entries: {:?}", - result.err() - ); - let checkpoints = result.unwrap(); - - // Only 1 AI checkpoint — the human entry has no corresponding prompt record - assert_eq!(checkpoints.len(), 1); - assert_eq!(checkpoints[0].author, "ai"); - } - - /// Test that `convert_to_checkpoints_for_squash` correctly handles s_ session attestations - /// by looking them up in the sessions map rather than the prompts map. - #[test] - fn test_convert_to_checkpoints_handles_s_session_entries() { - use crate::authorship::working_log::AgentId; - use std::collections::HashMap; - - let mut log = AuthorshipLog::new(); - log.metadata.base_commit_sha = "base456".to_string(); - - let agent_id = AgentId { - tool: "claude".to_string(), - id: "conv_abc123".to_string(), - model: "claude-sonnet-4-5-20250514".to_string(), - }; - - // Generate session ID the same way production code does - let session_key = generate_session_id(&agent_id.id, &agent_id.tool); - let trace_id = generate_trace_id(); - let attestation_hash = format!("{}::{}", session_key, trace_id); - - // Insert into sessions map (NOT prompts map) - log.metadata.sessions.insert( - session_key.clone(), - crate::authorship::authorship_log::SessionRecord { - agent_id: agent_id.clone(), - human_author: Some("dev@example.com".to_string()), - custom_attributes: None, - }, - ); - - // File with session-format attestation - let mut file1 = FileAttestation::new("src/main.rs".to_string()); - file1.add_entry(AttestationEntry::new( - attestation_hash.clone(), - vec![LineRange::Range(1, 3)], - )); - log.attestations.push(file1); - - let mut file_contents = HashMap::new(); - file_contents.insert( - "src/main.rs".to_string(), - "line1\nline2\nline3\n".to_string(), - ); - - let result = log.convert_to_checkpoints_for_squash(&file_contents); - assert!( - result.is_ok(), - "convert_to_checkpoints_for_squash must handle s_ session entries: {:?}", - result.err() - ); - let checkpoints = result.unwrap(); - assert_eq!(checkpoints.len(), 1); - assert_eq!(checkpoints[0].agent_id.as_ref().unwrap().tool, "claude"); - assert_eq!(checkpoints[0].agent_id.as_ref().unwrap().id, "conv_abc123"); - } - // TODO: `get_line_attribution` routing for h_ hashes requires a live `Repository` instance // and cannot be unit-tested here without significant mocking infrastructure. // The h_-routing path (returning HumanRecord data instead of PromptRecord) is covered by