diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index d6fd59d..fb75d98 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -26,6 +26,8 @@ pub mod sandbox; mod session; mod sse; pub mod stale_branch; +||||||| f76311f +pub mod summary_compression; pub mod task_registry; pub mod task_packet; pub mod team_cron_registry; diff --git a/rust/crates/runtime/src/summary_compression.rs b/rust/crates/runtime/src/summary_compression.rs new file mode 100644 index 0000000..30ae276 --- /dev/null +++ b/rust/crates/runtime/src/summary_compression.rs @@ -0,0 +1,300 @@ +use std::collections::BTreeSet; + +const DEFAULT_MAX_CHARS: usize = 1_200; +const DEFAULT_MAX_LINES: usize = 24; +const DEFAULT_MAX_LINE_CHARS: usize = 160; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SummaryCompressionBudget { + pub max_chars: usize, + pub max_lines: usize, + pub max_line_chars: usize, +} + +impl Default for SummaryCompressionBudget { + fn default() -> Self { + Self { + max_chars: DEFAULT_MAX_CHARS, + max_lines: DEFAULT_MAX_LINES, + max_line_chars: DEFAULT_MAX_LINE_CHARS, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SummaryCompressionResult { + pub summary: String, + pub original_chars: usize, + pub compressed_chars: usize, + pub original_lines: usize, + pub compressed_lines: usize, + pub removed_duplicate_lines: usize, + pub omitted_lines: usize, + pub truncated: bool, +} + +#[must_use] +pub fn compress_summary( + summary: &str, + budget: SummaryCompressionBudget, +) -> SummaryCompressionResult { + let original_chars = summary.chars().count(); + let original_lines = summary.lines().count(); + + let normalized = normalize_lines(summary, budget.max_line_chars); + if normalized.lines.is_empty() || budget.max_chars == 0 || budget.max_lines == 0 { + return SummaryCompressionResult { + summary: String::new(), + original_chars, + compressed_chars: 0, + original_lines, + compressed_lines: 0, + removed_duplicate_lines: normalized.removed_duplicate_lines, + omitted_lines: normalized.lines.len(), + truncated: original_chars > 0, + }; + } + + let selected = select_line_indexes(&normalized.lines, budget); + let mut compressed_lines = selected + .iter() + .map(|index| normalized.lines[*index].clone()) + .collect::>(); + if compressed_lines.is_empty() { + compressed_lines.push(truncate_line(&normalized.lines[0], budget.max_chars)); + } + let omitted_lines = normalized + .lines + .len() + .saturating_sub(compressed_lines.len()); + + if omitted_lines > 0 { + let omission_notice = omission_notice(omitted_lines); + push_line_with_budget(&mut compressed_lines, omission_notice, budget); + } + + let compressed_summary = compressed_lines.join("\n"); + + SummaryCompressionResult { + compressed_chars: compressed_summary.chars().count(), + compressed_lines: compressed_lines.len(), + removed_duplicate_lines: normalized.removed_duplicate_lines, + omitted_lines, + truncated: compressed_summary != summary.trim(), + summary: compressed_summary, + original_chars, + original_lines, + } +} + +#[must_use] +pub fn compress_summary_text(summary: &str) -> String { + compress_summary(summary, SummaryCompressionBudget::default()).summary +} + +#[derive(Debug, Default)] +struct NormalizedSummary { + lines: Vec, + removed_duplicate_lines: usize, +} + +fn normalize_lines(summary: &str, max_line_chars: usize) -> NormalizedSummary { + let mut seen = BTreeSet::new(); + let mut lines = Vec::new(); + let mut removed_duplicate_lines = 0; + + for raw_line in summary.lines() { + let normalized = collapse_inline_whitespace(raw_line); + if normalized.is_empty() { + continue; + } + + let truncated = truncate_line(&normalized, max_line_chars); + let dedupe_key = dedupe_key(&truncated); + if !seen.insert(dedupe_key) { + removed_duplicate_lines += 1; + continue; + } + + lines.push(truncated); + } + + NormalizedSummary { + lines, + removed_duplicate_lines, + } +} + +fn select_line_indexes(lines: &[String], budget: SummaryCompressionBudget) -> Vec { + let mut selected = BTreeSet::::new(); + + for priority in 0..=3 { + for (index, line) in lines.iter().enumerate() { + if selected.contains(&index) || line_priority(line) != priority { + continue; + } + + let candidate = selected + .iter() + .map(|selected_index| lines[*selected_index].as_str()) + .chain(std::iter::once(line.as_str())) + .collect::>(); + + if candidate.len() > budget.max_lines { + continue; + } + + if joined_char_count(&candidate) > budget.max_chars { + continue; + } + + selected.insert(index); + } + } + + selected.into_iter().collect() +} + +fn push_line_with_budget(lines: &mut Vec, line: String, budget: SummaryCompressionBudget) { + let candidate = lines + .iter() + .map(String::as_str) + .chain(std::iter::once(line.as_str())) + .collect::>(); + + if candidate.len() <= budget.max_lines && joined_char_count(&candidate) <= budget.max_chars { + lines.push(line); + } +} + +fn joined_char_count(lines: &[&str]) -> usize { + lines.iter().map(|line| line.chars().count()).sum::() + lines.len().saturating_sub(1) +} + +fn line_priority(line: &str) -> usize { + if line == "Summary:" || line == "Conversation summary:" || is_core_detail(line) { + 0 + } else if is_section_header(line) { + 1 + } else if line.starts_with("- ") || line.starts_with(" - ") { + 2 + } else { + 3 + } +} + +fn is_core_detail(line: &str) -> bool { + [ + "- Scope:", + "- Current work:", + "- Pending work:", + "- Key files referenced:", + "- Tools mentioned:", + "- Recent user requests:", + "- Previously compacted context:", + "- Newly compacted context:", + ] + .iter() + .any(|prefix| line.starts_with(prefix)) +} + +fn is_section_header(line: &str) -> bool { + line.ends_with(':') +} + +fn omission_notice(omitted_lines: usize) -> String { + format!("- … {omitted_lines} additional line(s) omitted.") +} + +fn collapse_inline_whitespace(line: &str) -> String { + line.split_whitespace().collect::>().join(" ") +} + +fn truncate_line(line: &str, max_chars: usize) -> String { + if max_chars == 0 || line.chars().count() <= max_chars { + return line.to_string(); + } + + if max_chars == 1 { + return "…".to_string(); + } + + let mut truncated = line + .chars() + .take(max_chars.saturating_sub(1)) + .collect::(); + truncated.push('…'); + truncated +} + +fn dedupe_key(line: &str) -> String { + line.to_ascii_lowercase() +} + +#[cfg(test)] +mod tests { + use super::{compress_summary, compress_summary_text, SummaryCompressionBudget}; + + #[test] + fn collapses_whitespace_and_duplicate_lines() { + // given + let summary = "Conversation summary:\n\n- Scope: compact earlier messages.\n- Scope: compact earlier messages.\n- Current work: update runtime module.\n"; + + // when + let result = compress_summary(summary, SummaryCompressionBudget::default()); + + // then + assert_eq!(result.removed_duplicate_lines, 1); + assert!(result + .summary + .contains("- Scope: compact earlier messages.")); + assert!(!result.summary.contains(" compact earlier")); + } + + #[test] + fn keeps_core_lines_when_budget_is_tight() { + // given + let summary = [ + "Conversation summary:", + "- Scope: 18 earlier messages compacted.", + "- Current work: finish summary compression.", + "- Key timeline:", + " - user: asked for a working implementation.", + " - assistant: inspected runtime compaction flow.", + " - tool: cargo check succeeded.", + ] + .join("\n"); + + // when + let result = compress_summary( + &summary, + SummaryCompressionBudget { + max_chars: 120, + max_lines: 3, + max_line_chars: 80, + }, + ); + + // then + assert!(result.summary.contains("Conversation summary:")); + assert!(result + .summary + .contains("- Scope: 18 earlier messages compacted.")); + assert!(result + .summary + .contains("- Current work: finish summary compression.")); + assert!(result.omitted_lines > 0); + } + + #[test] + fn provides_a_default_text_only_helper() { + // given + let summary = "Summary:\n\nA short line."; + + // when + let compressed = compress_summary_text(summary); + + // then + assert_eq!(compressed, "Summary:\nA short line."); + } +}