From 784f07abfaa382f8928156d34e282ff5d7afe519 Mon Sep 17 00:00:00 2001 From: Yeachan-Heo Date: Sat, 4 Apr 2026 14:50:31 +0000 Subject: [PATCH] Harden worker boot recovery before task dispatch The worker boot registry now exposes the requested lifecycle states, emits structured trust and prompt-delivery events, and recovers from shell or wrong-target prompt delivery by replaying the last prompt. Supporting fixes keep MCP remote config parsing backwards-compatible and make CLI argument parsing less dependent on ambient config and cwd state so the workspace stays green under full parallel test runs. Constraint: Worker prompts must not be dispatched before a confirmed ready_for_prompt handshake Constraint: Prompt misdelivery recovery must stay minimal and avoid new dependencies Rejected: Keep prompt_accepted and blocked as public lifecycle states | user requested the narrower explicit state set Rejected: Treat url-only MCP server configs as invalid | existing CLI/runtime tests still rely on that shorthand Confidence: high Scope-risk: moderate Reversibility: clean Directive: Preserve prompt_in_flight semantics when extending worker boot; misdelivery detection depends on it Tested: cargo build --workspace; cargo test --workspace Not-tested: Live tmux worker delivery against a real external coding agent pane --- rust/crates/runtime/src/config.rs | 46 ++- rust/crates/runtime/src/lib.rs | 5 +- rust/crates/runtime/src/mcp_stdio.rs | 3 +- rust/crates/runtime/src/worker_boot.rs | 376 ++++++++++++++++++----- rust/crates/rusty-claude-cli/src/main.rs | 34 +- rust/crates/tools/src/lib.rs | 22 +- 6 files changed, 398 insertions(+), 88 deletions(-) diff --git a/rust/crates/runtime/src/config.rs b/rust/crates/runtime/src/config.rs index dc61ba7..cae7617 100644 --- a/rust/crates/runtime/src/config.rs +++ b/rust/crates/runtime/src/config.rs @@ -786,7 +786,8 @@ fn parse_mcp_server_config( context: &str, ) -> Result { let object = expect_object(value, context)?; - let server_type = optional_string(object, "type", context)?.unwrap_or("stdio"); + let server_type = + optional_string(object, "type", context)?.unwrap_or_else(|| infer_mcp_server_type(object)); match server_type { "stdio" => Ok(McpServerConfig::Stdio(McpStdioServerConfig { command: expect_string(object, "command", context)?.to_string(), @@ -818,6 +819,14 @@ fn parse_mcp_server_config( } } +fn infer_mcp_server_type(object: &BTreeMap) -> &'static str { + if object.contains_key("url") { + "http" + } else { + "stdio" + } +} + fn parse_mcp_remote_server_config( object: &BTreeMap, context: &str, @@ -1297,6 +1306,41 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn infers_http_mcp_servers_from_url_only_config() { + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + home.join("settings.json"), + r#"{ + "mcpServers": { + "remote": { + "url": "https://example.test/mcp" + } + } + }"#, + ) + .expect("write mcp settings"); + + let loaded = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should load"); + + let remote_server = loaded.mcp().get("remote").expect("remote server should exist"); + assert_eq!(remote_server.transport(), McpTransport::Http); + match &remote_server.config { + McpServerConfig::Http(config) => { + assert_eq!(config.url, "https://example.test/mcp"); + } + other => panic!("expected http config, got {other:?}"), + } + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + #[test] fn parses_plugin_config_from_enabled_plugins() { let root = temp_dir(); diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index 173eed4..e0f3ba3 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -143,8 +143,9 @@ pub use usage::{ format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker, }; pub use worker_boot::{ - Worker, WorkerEvent, WorkerEventKind, WorkerFailure, WorkerFailureKind, WorkerReadySnapshot, - WorkerRegistry, WorkerStatus, + Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind, + WorkerPromptTarget, WorkerReadySnapshot, WorkerRegistry, WorkerStatus, + WorkerTrustResolution, }; #[cfg(test)] diff --git a/rust/crates/runtime/src/mcp_stdio.rs b/rust/crates/runtime/src/mcp_stdio.rs index a17ec13..4ebdf62 100644 --- a/rust/crates/runtime/src/mcp_stdio.rs +++ b/rust/crates/runtime/src/mcp_stdio.rs @@ -360,7 +360,8 @@ impl McpServerManagerError { } fn recoverable(&self) -> bool { - matches!(self, Self::Transport { .. } | Self::Timeout { .. }) + !matches!(self.lifecycle_phase(), McpLifecyclePhase::InitializeHandshake) + && matches!(self, Self::Transport { .. } | Self::Timeout { .. }) } fn discovery_failure(&self, server_name: &str) -> McpDiscoveryFailure { diff --git a/rust/crates/runtime/src/worker_boot.rs b/rust/crates/runtime/src/worker_boot.rs index 08b5af5..4854e2a 100644 --- a/rust/crates/runtime/src/worker_boot.rs +++ b/rust/crates/runtime/src/worker_boot.rs @@ -24,9 +24,7 @@ pub enum WorkerStatus { Spawning, TrustRequired, ReadyForPrompt, - PromptAccepted, Running, - Blocked, Finished, Failed, } @@ -37,9 +35,7 @@ impl std::fmt::Display for WorkerStatus { Self::Spawning => write!(f, "spawning"), Self::TrustRequired => write!(f, "trust_required"), Self::ReadyForPrompt => write!(f, "ready_for_prompt"), - Self::PromptAccepted => write!(f, "prompt_accepted"), Self::Running => write!(f, "running"), - Self::Blocked => write!(f, "blocked"), Self::Finished => write!(f, "finished"), Self::Failed => write!(f, "failed"), } @@ -69,7 +65,6 @@ pub enum WorkerEventKind { TrustRequired, TrustResolved, ReadyForPrompt, - PromptAccepted, PromptMisdelivery, PromptReplayArmed, Running, @@ -78,12 +73,46 @@ pub enum WorkerEventKind { Failed, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum WorkerTrustResolution { + AutoAllowlisted, + ManualApproval, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum WorkerPromptTarget { + Shell, + WrongTarget, + Unknown, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum WorkerEventPayload { + TrustPrompt { + cwd: String, + #[serde(skip_serializing_if = "Option::is_none")] + resolution: Option, + }, + PromptDelivery { + prompt_preview: String, + observed_target: WorkerPromptTarget, + #[serde(skip_serializing_if = "Option::is_none")] + observed_cwd: Option, + recovery_armed: bool, + }, +} + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct WorkerEvent { pub seq: u64, pub kind: WorkerEventKind, pub status: WorkerStatus, pub detail: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub payload: Option, pub timestamp: u64, } @@ -96,6 +125,7 @@ pub struct Worker { pub trust_gate_cleared: bool, pub auto_recover_prompt_misdelivery: bool, pub prompt_delivery_attempts: u32, + pub prompt_in_flight: bool, pub last_prompt: Option, pub replay_prompt: Option, pub last_error: Option, @@ -143,6 +173,7 @@ impl WorkerRegistry { trust_gate_cleared: false, auto_recover_prompt_misdelivery, prompt_delivery_attempts: 0, + prompt_in_flight: false, last_prompt: None, replay_prompt: None, last_error: None, @@ -155,6 +186,7 @@ impl WorkerRegistry { WorkerEventKind::Spawning, WorkerStatus::Spawning, Some("worker created".to_string()), + None, ); inner.workers.insert(worker_id, worker.clone()); worker @@ -186,6 +218,10 @@ impl WorkerRegistry { WorkerEventKind::TrustRequired, WorkerStatus::TrustRequired, Some("trust prompt detected".to_string()), + Some(WorkerEventPayload::TrustPrompt { + cwd: worker.cwd.clone(), + resolution: None, + }), ); if worker.trust_auto_resolve { @@ -197,26 +233,57 @@ impl WorkerRegistry { WorkerEventKind::TrustResolved, WorkerStatus::Spawning, Some("allowlisted repo auto-resolved trust prompt".to_string()), + Some(WorkerEventPayload::TrustPrompt { + cwd: worker.cwd.clone(), + resolution: Some(WorkerTrustResolution::AutoAllowlisted), + }), ); } else { return Ok(worker.clone()); } } - if prompt_misdelivery_is_relevant(worker) - && detect_prompt_misdelivery(&lowered, worker.last_prompt.as_deref()) + if let Some(observation) = prompt_misdelivery_is_relevant(worker) + .then(|| { + detect_prompt_misdelivery( + screen_text, + &lowered, + worker.last_prompt.as_deref(), + &worker.cwd, + ) + }) + .flatten() { - let detail = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default()); + let prompt_preview = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default()); + let message = match observation.target { + WorkerPromptTarget::Shell => { + format!("worker prompt landed in shell instead of coding agent: {prompt_preview}") + } + WorkerPromptTarget::WrongTarget => format!( + "worker prompt landed in the wrong target instead of {}: {}", + worker.cwd, prompt_preview + ), + WorkerPromptTarget::Unknown => format!( + "worker prompt delivery failed before reaching coding agent: {prompt_preview}" + ), + }; worker.last_error = Some(WorkerFailure { kind: WorkerFailureKind::PromptDelivery, - message: format!("worker prompt landed in shell instead of coding agent: {detail}"), + message, created_at: now_secs(), }); + worker.prompt_in_flight = false; push_event( worker, WorkerEventKind::PromptMisdelivery, - WorkerStatus::Blocked, - Some("shell misdelivery detected".to_string()), + WorkerStatus::Failed, + Some(prompt_misdelivery_detail(&observation).to_string()), + Some(WorkerEventPayload::PromptDelivery { + prompt_preview: prompt_preview.clone(), + observed_target: observation.target, + observed_cwd: observation.observed_cwd.clone(), + recovery_armed: false, + }), ); if worker.auto_recover_prompt_misdelivery { worker.replay_prompt = worker.last_prompt.clone(); @@ -225,37 +292,29 @@ impl WorkerRegistry { worker, WorkerEventKind::PromptReplayArmed, WorkerStatus::ReadyForPrompt, - Some("prompt replay armed after shell misdelivery".to_string()), + Some("prompt replay armed after prompt misdelivery".to_string()), + Some(WorkerEventPayload::PromptDelivery { + prompt_preview, + observed_target: observation.target, + observed_cwd: observation.observed_cwd, + recovery_armed: true, + }), ); } else { - worker.status = WorkerStatus::Blocked; + worker.status = WorkerStatus::Failed; } return Ok(worker.clone()); } - if detect_running_cue(&lowered) - && matches!( - worker.status, - WorkerStatus::PromptAccepted | WorkerStatus::ReadyForPrompt - ) - { + if detect_running_cue(&lowered) && worker.prompt_in_flight { + worker.prompt_in_flight = false; worker.status = WorkerStatus::Running; worker.last_error = None; - push_event( - worker, - WorkerEventKind::Running, - WorkerStatus::Running, - Some("worker accepted prompt and started running".to_string()), - ); } - if detect_ready_for_prompt(screen_text, &lowered) - && !matches!( - worker.status, - WorkerStatus::ReadyForPrompt | WorkerStatus::Running - ) - { + if detect_ready_for_prompt(screen_text, &lowered) && worker.status != WorkerStatus::ReadyForPrompt { worker.status = WorkerStatus::ReadyForPrompt; + worker.prompt_in_flight = false; if matches!( worker.last_error.as_ref().map(|failure| failure.kind), Some(WorkerFailureKind::TrustGate) @@ -267,6 +326,7 @@ impl WorkerRegistry { WorkerEventKind::ReadyForPrompt, WorkerStatus::ReadyForPrompt, Some("worker is ready for prompt delivery".to_string()), + None, ); } @@ -295,6 +355,10 @@ impl WorkerRegistry { WorkerEventKind::TrustResolved, WorkerStatus::Spawning, Some("trust prompt resolved manually".to_string()), + Some(WorkerEventPayload::TrustPrompt { + cwd: worker.cwd.clone(), + resolution: Some(WorkerTrustResolution::ManualApproval), + }), ); Ok(worker.clone()) } @@ -321,18 +385,20 @@ impl WorkerRegistry { .ok_or_else(|| format!("worker {worker_id} has no prompt to send or replay"))?; worker.prompt_delivery_attempts += 1; + worker.prompt_in_flight = true; worker.last_prompt = Some(next_prompt.clone()); worker.replay_prompt = None; worker.last_error = None; - worker.status = WorkerStatus::PromptAccepted; + worker.status = WorkerStatus::Running; push_event( worker, - WorkerEventKind::PromptAccepted, - WorkerStatus::PromptAccepted, + WorkerEventKind::Running, + WorkerStatus::Running, Some(format!( - "prompt accepted for delivery: {}", + "prompt dispatched to worker: {}", prompt_preview(&next_prompt) )), + None, ); Ok(worker.clone()) } @@ -346,10 +412,7 @@ impl WorkerRegistry { worker_id: worker.worker_id.clone(), status: worker.status, ready: worker.status == WorkerStatus::ReadyForPrompt, - blocked: matches!( - worker.status, - WorkerStatus::TrustRequired | WorkerStatus::Blocked - ), + blocked: matches!(worker.status, WorkerStatus::TrustRequired | WorkerStatus::Failed), replay_prompt_ready: worker.replay_prompt.is_some(), last_error: worker.last_error.clone(), }) @@ -367,11 +430,13 @@ impl WorkerRegistry { worker.replay_prompt = None; worker.last_error = None; worker.prompt_delivery_attempts = 0; + worker.prompt_in_flight = false; push_event( worker, WorkerEventKind::Restarted, WorkerStatus::Spawning, Some("worker restarted".to_string()), + None, ); Ok(worker.clone()) } @@ -383,11 +448,13 @@ impl WorkerRegistry { .get_mut(worker_id) .ok_or_else(|| format!("worker not found: {worker_id}"))?; worker.status = WorkerStatus::Finished; + worker.prompt_in_flight = false; push_event( worker, WorkerEventKind::Finished, WorkerStatus::Finished, Some("worker terminated by control plane".to_string()), + None, ); Ok(worker.clone()) } @@ -406,7 +473,6 @@ impl WorkerRegistry { .get_mut(worker_id) .ok_or_else(|| format!("worker not found: {worker_id}"))?; - // Detect degraded completion: finish=unknown with zero output is provider failure let is_provider_failure = (finish_reason == "unknown" && tokens_output == 0) || finish_reason == "error"; @@ -423,15 +489,17 @@ impl WorkerRegistry { created_at: now_secs(), }); worker.status = WorkerStatus::Failed; + worker.prompt_in_flight = false; push_event( worker, WorkerEventKind::Failed, WorkerStatus::Failed, Some("provider failure classified".to_string()), + None, ); } else { - // Normal completion worker.status = WorkerStatus::Finished; + worker.prompt_in_flight = false; worker.last_error = None; push_event( worker, @@ -440,6 +508,7 @@ impl WorkerRegistry { Some(format!( "session completed: finish='{finish_reason}', tokens={tokens_output}" )), + None, ); } @@ -458,10 +527,13 @@ pub struct WorkerReadySnapshot { } fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool { - matches!( - worker.status, - WorkerStatus::PromptAccepted | WorkerStatus::Running - ) && worker.last_prompt.is_some() + worker.prompt_in_flight && worker.last_prompt.is_some() +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct PromptDeliveryObservation { + target: WorkerPromptTarget, + observed_cwd: Option, } fn push_event( @@ -469,6 +541,7 @@ fn push_event( kind: WorkerEventKind, status: WorkerStatus, detail: Option, + payload: Option, ) { let timestamp = now_secs(); let seq = worker.events.len() as u64 + 1; @@ -478,6 +551,7 @@ fn push_event( kind, status, detail, + payload, timestamp, }); } @@ -561,11 +635,35 @@ fn is_shell_prompt(trimmed: &str) -> bool { || trimmed.starts_with('#') } -fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool { +fn detect_prompt_misdelivery( + screen_text: &str, + lowered: &str, + prompt: Option<&str>, + expected_cwd: &str, +) -> Option { let Some(prompt) = prompt else { - return false; + return None; }; + let prompt_snippet = prompt + .lines() + .find(|line| !line.trim().is_empty()) + .map(|line| line.trim().to_ascii_lowercase()) + .unwrap_or_default(); + if prompt_snippet.is_empty() { + return None; + } + let prompt_visible = lowered.contains(&prompt_snippet); + + if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) { + if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) { + return Some(PromptDeliveryObservation { + target: WorkerPromptTarget::WrongTarget, + observed_cwd: Some(observed_cwd), + }); + } + } + let shell_error = [ "command not found", "syntax error near unexpected token", @@ -576,17 +674,10 @@ fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool { .iter() .any(|needle| lowered.contains(needle)); - if !shell_error { - return false; - } - - let first_prompt_line = prompt - .lines() - .find(|line| !line.trim().is_empty()) - .map(|line| line.trim().to_ascii_lowercase()) - .unwrap_or_default(); - - first_prompt_line.is_empty() || lowered.contains(&first_prompt_line) + (shell_error && prompt_visible).then_some(PromptDeliveryObservation { + target: WorkerPromptTarget::Shell, + observed_cwd: None, + }) } fn prompt_preview(prompt: &str) -> String { @@ -598,6 +689,53 @@ fn prompt_preview(prompt: &str) -> String { format!("{}…", preview.trim_end()) } +fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str { + match observation.target { + WorkerPromptTarget::Shell => "shell misdelivery detected", + WorkerPromptTarget::WrongTarget => "prompt landed in wrong target", + WorkerPromptTarget::Unknown => "prompt delivery failure detected", + } +} + +fn detect_observed_shell_cwd(screen_text: &str) -> Option { + screen_text.lines().find_map(|line| { + let tokens = line.split_whitespace().collect::>(); + tokens + .iter() + .position(|token| is_shell_prompt_token(token)) + .and_then(|index| index.checked_sub(1).map(|cwd_index| tokens[cwd_index])) + .filter(|candidate| looks_like_cwd_label(candidate)) + .map(ToOwned::to_owned) + }) +} + +fn is_shell_prompt_token(token: &&str) -> bool { + matches!(*token, "$" | "%" | "#" | ">" | "›" | "❯") +} + +fn looks_like_cwd_label(candidate: &str) -> bool { + candidate.starts_with('/') + || candidate.starts_with('~') + || candidate.starts_with('.') + || candidate.contains('/') +} + +fn cwd_matches_observed_target(expected_cwd: &str, observed_cwd: &str) -> bool { + let expected = normalize_path(expected_cwd); + let expected_base = expected + .file_name() + .map(|segment| segment.to_string_lossy().into_owned()) + .unwrap_or_else(|| expected.to_string_lossy().into_owned()); + let observed_base = Path::new(observed_cwd) + .file_name() + .map(|segment| segment.to_string_lossy().into_owned()) + .unwrap_or_else(|| observed_cwd.trim_matches(':').to_string()); + + expected.to_string_lossy().ends_with(observed_cwd) + || observed_cwd.ends_with(expected.to_string_lossy().as_ref()) + || expected_base == observed_base +} + #[cfg(test)] mod tests { use super::*; @@ -619,14 +757,30 @@ mod tests { .expect("trust observe should succeed"); assert_eq!(after_trust.status, WorkerStatus::Spawning); assert!(after_trust.trust_gate_cleared); - assert!(after_trust + let trust_required = after_trust .events .iter() - .any(|event| event.kind == WorkerEventKind::TrustRequired)); - assert!(after_trust + .find(|event| event.kind == WorkerEventKind::TrustRequired) + .expect("trust required event should exist"); + assert_eq!( + trust_required.payload, + Some(WorkerEventPayload::TrustPrompt { + cwd: "/tmp/worktrees/repo-a".to_string(), + resolution: None, + }) + ); + let trust_resolved = after_trust .events .iter() - .any(|event| event.kind == WorkerEventKind::TrustResolved)); + .find(|event| event.kind == WorkerEventKind::TrustResolved) + .expect("trust resolved event should exist"); + assert_eq!( + trust_resolved.payload, + Some(WorkerEventPayload::TrustPrompt { + cwd: "/tmp/worktrees/repo-a".to_string(), + resolution: Some(WorkerTrustResolution::AutoAllowlisted), + }) + ); let ready = registry .observe(&worker.worker_id, "Ready for your input\n>") @@ -662,6 +816,18 @@ mod tests { .expect("manual trust resolution should succeed"); assert_eq!(resolved.status, WorkerStatus::Spawning); assert!(resolved.trust_gate_cleared); + let trust_resolved = resolved + .events + .iter() + .find(|event| event.kind == WorkerEventKind::TrustResolved) + .expect("manual trust resolve event should exist"); + assert_eq!( + trust_resolved.payload, + Some(WorkerEventPayload::TrustPrompt { + cwd: "/tmp/repo-b".to_string(), + resolution: Some(WorkerTrustResolution::ManualApproval), + }) + ); } #[test] @@ -679,11 +845,12 @@ mod tests { .observe(&worker.worker_id, "Ready for input\n>") .expect("ready observe should succeed"); - let accepted = registry + let running = registry .send_prompt(&worker.worker_id, Some("Implement worker handshake")) .expect("prompt send should succeed"); - assert_eq!(accepted.status, WorkerStatus::PromptAccepted); - assert_eq!(accepted.prompt_delivery_attempts, 1); + assert_eq!(running.status, WorkerStatus::Running); + assert_eq!(running.prompt_delivery_attempts, 1); + assert!(running.prompt_in_flight); let recovered = registry .observe( @@ -703,23 +870,89 @@ mod tests { recovered.replay_prompt.as_deref(), Some("Implement worker handshake") ); - assert!(recovered + let misdelivery = recovered .events .iter() - .any(|event| event.kind == WorkerEventKind::PromptMisdelivery)); - assert!(recovered + .find(|event| event.kind == WorkerEventKind::PromptMisdelivery) + .expect("misdelivery event should exist"); + assert_eq!(misdelivery.status, WorkerStatus::Failed); + assert_eq!( + misdelivery.payload, + Some(WorkerEventPayload::PromptDelivery { + prompt_preview: "Implement worker handshake".to_string(), + observed_target: WorkerPromptTarget::Shell, + observed_cwd: None, + recovery_armed: false, + }) + ); + let replay = recovered .events .iter() - .any(|event| event.kind == WorkerEventKind::PromptReplayArmed)); + .find(|event| event.kind == WorkerEventKind::PromptReplayArmed) + .expect("replay event should exist"); + assert_eq!(replay.status, WorkerStatus::ReadyForPrompt); + assert_eq!( + replay.payload, + Some(WorkerEventPayload::PromptDelivery { + prompt_preview: "Implement worker handshake".to_string(), + observed_target: WorkerPromptTarget::Shell, + observed_cwd: None, + recovery_armed: true, + }) + ); let replayed = registry .send_prompt(&worker.worker_id, None) .expect("replay send should succeed"); - assert_eq!(replayed.status, WorkerStatus::PromptAccepted); + assert_eq!(replayed.status, WorkerStatus::Running); assert!(replayed.replay_prompt.is_none()); assert_eq!(replayed.prompt_delivery_attempts, 2); } + #[test] + fn prompt_delivery_detects_wrong_target_and_replays_to_expected_worker() { + let registry = WorkerRegistry::new(); + let worker = registry.create("/tmp/repo-target-a", &[], true); + registry + .observe(&worker.worker_id, "Ready for input\n>") + .expect("ready observe should succeed"); + registry + .send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests")) + .expect("prompt send should succeed"); + + let recovered = registry + .observe( + &worker.worker_id, + "/tmp/repo-target-b % Run the worker bootstrap tests\nzsh: command not found: Run", + ) + .expect("wrong target should be detected"); + + assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt); + assert_eq!( + recovered.replay_prompt.as_deref(), + Some("Run the worker bootstrap tests") + ); + assert!(recovered + .last_error + .expect("wrong target error should exist") + .message + .contains("wrong target")); + let misdelivery = recovered + .events + .iter() + .find(|event| event.kind == WorkerEventKind::PromptMisdelivery) + .expect("wrong-target event should exist"); + assert_eq!( + misdelivery.payload, + Some(WorkerEventPayload::PromptDelivery { + prompt_preview: "Run the worker bootstrap tests".to_string(), + observed_target: WorkerPromptTarget::WrongTarget, + observed_cwd: Some("/tmp/repo-target-b".to_string()), + recovery_armed: false, + }) + ); + } + #[test] fn await_ready_surfaces_blocked_or_ready_worker_state() { let registry = WorkerRegistry::new(); @@ -774,6 +1007,7 @@ mod tests { assert_eq!(restarted.status, WorkerStatus::Spawning); assert_eq!(restarted.prompt_delivery_attempts, 0); assert!(restarted.last_prompt.is_none()); + assert!(!restarted.prompt_in_flight); let finished = registry .terminate(&worker.worker_id) @@ -796,7 +1030,6 @@ mod tests { .send_prompt(&worker.worker_id, Some("Run tests")) .expect("prompt send should succeed"); - // Simulate degraded completion: finish="unknown", zero output let failed = registry .observe_completion(&worker.worker_id, "unknown", 0) .expect("completion observe should succeed"); @@ -822,7 +1055,6 @@ mod tests { .send_prompt(&worker.worker_id, Some("Run tests")) .expect("prompt send should succeed"); - // Normal completion with output let finished = registry .observe_completion(&worker.worker_id, "stop", 150) .expect("completion observe should succeed"); diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index ac9b023..8170072 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -212,7 +212,7 @@ impl CliOutputFormat { fn parse_args(args: &[String]) -> Result { let mut model = DEFAULT_MODEL.to_string(); let mut output_format = CliOutputFormat::Text; - let mut permission_mode = default_permission_mode(); + let mut permission_mode_override = None; let mut wants_help = false; let mut wants_version = false; let mut allowed_tool_values = Vec::new(); @@ -251,7 +251,7 @@ fn parse_args(args: &[String]) -> Result { let value = args .get(index + 1) .ok_or_else(|| "missing value for --permission-mode".to_string())?; - permission_mode = parse_permission_mode_arg(value)?; + permission_mode_override = Some(parse_permission_mode_arg(value)?); index += 2; } flag if flag.starts_with("--output-format=") => { @@ -259,11 +259,11 @@ fn parse_args(args: &[String]) -> Result { index += 1; } flag if flag.starts_with("--permission-mode=") => { - permission_mode = parse_permission_mode_arg(&flag[18..])?; + permission_mode_override = Some(parse_permission_mode_arg(&flag[18..])?); index += 1; } "--dangerously-skip-permissions" => { - permission_mode = PermissionMode::DangerFullAccess; + permission_mode_override = Some(PermissionMode::DangerFullAccess); index += 1; } "-p" => { @@ -277,7 +277,8 @@ fn parse_args(args: &[String]) -> Result { model: resolve_model_alias(&model).to_string(), output_format, allowed_tools: normalize_allowed_tools(&allowed_tool_values)?, - permission_mode, + permission_mode: permission_mode_override + .unwrap_or_else(default_permission_mode), }); } "--print" => { @@ -330,6 +331,7 @@ fn parse_args(args: &[String]) -> Result { let allowed_tools = normalize_allowed_tools(&allowed_tool_values)?; if rest.is_empty() { + let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode); return Ok(CliAction::Repl { model, allowed_tools, @@ -339,10 +341,13 @@ fn parse_args(args: &[String]) -> Result { if rest.first().map(String::as_str) == Some("--resume") { return parse_resume_args(&rest[1..]); } - if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode) { + if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode_override) + { return action; } + let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode); + match rest[0].as_str() { "dump-manifests" => Ok(CliAction::DumpManifests), "bootstrap-plan" => Ok(CliAction::BootstrapPlan), @@ -386,7 +391,7 @@ fn parse_args(args: &[String]) -> Result { fn parse_single_word_command_alias( rest: &[String], model: &str, - permission_mode: PermissionMode, + permission_mode_override: Option, ) -> Option> { if rest.len() != 1 { return None; @@ -397,7 +402,7 @@ fn parse_single_word_command_alias( "version" => Some(Ok(CliAction::Version)), "status" => Some(Ok(CliAction::Status { model: model.to_string(), - permission_mode, + permission_mode: permission_mode_override.unwrap_or_else(default_permission_mode), })), "sandbox" => Some(Ok(CliAction::Sandbox)), other => bare_slash_command_guidance(other).map(Err), @@ -588,6 +593,9 @@ fn resolve_model_alias(model: &str) -> &str { } fn normalize_allowed_tools(values: &[String]) -> Result, String> { + if values.is_empty() { + return Ok(None); + } current_tool_registry()?.normalize_allowed_tools(values) } @@ -5718,11 +5726,17 @@ mod tests { } fn with_current_dir(cwd: &Path, f: impl FnOnce() -> T) -> T { + let _guard = cwd_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let previous = std::env::current_dir().expect("cwd should load"); std::env::set_current_dir(cwd).expect("cwd should change"); - let result = f(); + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)); std::env::set_current_dir(previous).expect("cwd should restore"); - result + match result { + Ok(value) => value, + Err(payload) => std::panic::resume_unwind(payload), + } } fn write_plugin_fixture(root: &Path, name: &str, include_hooks: bool, include_lifecycle: bool) { diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 4cec927..9d7a2bb 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -5031,6 +5031,14 @@ mod tests { let observed_output: serde_json::Value = serde_json::from_str(&observed).expect("json"); assert_eq!(observed_output["status"], "spawning"); assert_eq!(observed_output["trust_gate_cleared"], true); + assert_eq!( + observed_output["events"][1]["payload"]["type"], + "trust_prompt" + ); + assert_eq!( + observed_output["events"][2]["payload"]["resolution"], + "auto_allowlisted" + ); let ready = execute_tool( "WorkerObserve", @@ -5063,8 +5071,9 @@ mod tests { ) .expect("WorkerSendPrompt should succeed after ready"); let accepted_output: serde_json::Value = serde_json::from_str(&accepted).expect("json"); - assert_eq!(accepted_output["status"], "prompt_accepted"); + assert_eq!(accepted_output["status"], "running"); assert_eq!(accepted_output["prompt_delivery_attempts"], 1); + assert_eq!(accepted_output["prompt_in_flight"], true); } #[test] @@ -5112,6 +5121,14 @@ mod tests { assert_eq!(recovered_output["status"], "ready_for_prompt"); assert_eq!(recovered_output["last_error"]["kind"], "prompt_delivery"); assert_eq!(recovered_output["replay_prompt"], "Investigate flaky boot"); + assert_eq!( + recovered_output["events"][3]["payload"]["observed_target"], + "shell" + ); + assert_eq!( + recovered_output["events"][4]["payload"]["recovery_armed"], + true + ); let replayed = execute_tool( "WorkerSendPrompt", @@ -5121,8 +5138,9 @@ mod tests { ) .expect("WorkerSendPrompt should replay recovered prompt"); let replayed_output: serde_json::Value = serde_json::from_str(&replayed).expect("json"); - assert_eq!(replayed_output["status"], "prompt_accepted"); + assert_eq!(replayed_output["status"], "running"); assert_eq!(replayed_output["prompt_delivery_attempts"], 2); + assert_eq!(replayed_output["prompt_in_flight"], true); } #[test]