Harden worker boot recovery before task dispatch

The worker boot registry now exposes the requested lifecycle states, emits structured trust and prompt-delivery events, and recovers from shell or wrong-target prompt delivery by replaying the last prompt. Supporting fixes keep MCP remote config parsing backwards-compatible and make CLI argument parsing less dependent on ambient config and cwd state so the workspace stays green under full parallel test runs.

Constraint: Worker prompts must not be dispatched before a confirmed ready_for_prompt handshake
Constraint: Prompt misdelivery recovery must stay minimal and avoid new dependencies
Rejected: Keep prompt_accepted and blocked as public lifecycle states | user requested the narrower explicit state set
Rejected: Treat url-only MCP server configs as invalid | existing CLI/runtime tests still rely on that shorthand
Confidence: high
Scope-risk: moderate
Reversibility: clean
Directive: Preserve prompt_in_flight semantics when extending worker boot; misdelivery detection depends on it
Tested: cargo build --workspace; cargo test --workspace
Not-tested: Live tmux worker delivery against a real external coding agent pane
This commit is contained in:
Yeachan-Heo
2026-04-04 14:50:31 +00:00
parent d87fbe6c65
commit 784f07abfa
6 changed files with 398 additions and 88 deletions

View File

@@ -786,7 +786,8 @@ fn parse_mcp_server_config(
context: &str,
) -> Result<McpServerConfig, ConfigError> {
let object = expect_object(value, context)?;
let server_type = optional_string(object, "type", context)?.unwrap_or("stdio");
let server_type =
optional_string(object, "type", context)?.unwrap_or_else(|| infer_mcp_server_type(object));
match server_type {
"stdio" => Ok(McpServerConfig::Stdio(McpStdioServerConfig {
command: expect_string(object, "command", context)?.to_string(),
@@ -818,6 +819,14 @@ fn parse_mcp_server_config(
}
}
fn infer_mcp_server_type(object: &BTreeMap<String, JsonValue>) -> &'static str {
if object.contains_key("url") {
"http"
} else {
"stdio"
}
}
fn parse_mcp_remote_server_config(
object: &BTreeMap<String, JsonValue>,
context: &str,
@@ -1297,6 +1306,41 @@ mod tests {
fs::remove_dir_all(root).expect("cleanup temp dir");
}
#[test]
fn infers_http_mcp_servers_from_url_only_config() {
let root = temp_dir();
let cwd = root.join("project");
let home = root.join("home").join(".claw");
fs::create_dir_all(&home).expect("home config dir");
fs::create_dir_all(&cwd).expect("project dir");
fs::write(
home.join("settings.json"),
r#"{
"mcpServers": {
"remote": {
"url": "https://example.test/mcp"
}
}
}"#,
)
.expect("write mcp settings");
let loaded = ConfigLoader::new(&cwd, &home)
.load()
.expect("config should load");
let remote_server = loaded.mcp().get("remote").expect("remote server should exist");
assert_eq!(remote_server.transport(), McpTransport::Http);
match &remote_server.config {
McpServerConfig::Http(config) => {
assert_eq!(config.url, "https://example.test/mcp");
}
other => panic!("expected http config, got {other:?}"),
}
fs::remove_dir_all(root).expect("cleanup temp dir");
}
#[test]
fn parses_plugin_config_from_enabled_plugins() {
let root = temp_dir();

View File

@@ -143,8 +143,9 @@ pub use usage::{
format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker,
};
pub use worker_boot::{
Worker, WorkerEvent, WorkerEventKind, WorkerFailure, WorkerFailureKind, WorkerReadySnapshot,
WorkerRegistry, WorkerStatus,
Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind,
WorkerPromptTarget, WorkerReadySnapshot, WorkerRegistry, WorkerStatus,
WorkerTrustResolution,
};
#[cfg(test)]

View File

@@ -360,7 +360,8 @@ impl McpServerManagerError {
}
fn recoverable(&self) -> bool {
matches!(self, Self::Transport { .. } | Self::Timeout { .. })
!matches!(self.lifecycle_phase(), McpLifecyclePhase::InitializeHandshake)
&& matches!(self, Self::Transport { .. } | Self::Timeout { .. })
}
fn discovery_failure(&self, server_name: &str) -> McpDiscoveryFailure {

View File

@@ -24,9 +24,7 @@ pub enum WorkerStatus {
Spawning,
TrustRequired,
ReadyForPrompt,
PromptAccepted,
Running,
Blocked,
Finished,
Failed,
}
@@ -37,9 +35,7 @@ impl std::fmt::Display for WorkerStatus {
Self::Spawning => write!(f, "spawning"),
Self::TrustRequired => write!(f, "trust_required"),
Self::ReadyForPrompt => write!(f, "ready_for_prompt"),
Self::PromptAccepted => write!(f, "prompt_accepted"),
Self::Running => write!(f, "running"),
Self::Blocked => write!(f, "blocked"),
Self::Finished => write!(f, "finished"),
Self::Failed => write!(f, "failed"),
}
@@ -69,7 +65,6 @@ pub enum WorkerEventKind {
TrustRequired,
TrustResolved,
ReadyForPrompt,
PromptAccepted,
PromptMisdelivery,
PromptReplayArmed,
Running,
@@ -78,12 +73,46 @@ pub enum WorkerEventKind {
Failed,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkerTrustResolution {
AutoAllowlisted,
ManualApproval,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkerPromptTarget {
Shell,
WrongTarget,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum WorkerEventPayload {
TrustPrompt {
cwd: String,
#[serde(skip_serializing_if = "Option::is_none")]
resolution: Option<WorkerTrustResolution>,
},
PromptDelivery {
prompt_preview: String,
observed_target: WorkerPromptTarget,
#[serde(skip_serializing_if = "Option::is_none")]
observed_cwd: Option<String>,
recovery_armed: bool,
},
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct WorkerEvent {
pub seq: u64,
pub kind: WorkerEventKind,
pub status: WorkerStatus,
pub detail: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub payload: Option<WorkerEventPayload>,
pub timestamp: u64,
}
@@ -96,6 +125,7 @@ pub struct Worker {
pub trust_gate_cleared: bool,
pub auto_recover_prompt_misdelivery: bool,
pub prompt_delivery_attempts: u32,
pub prompt_in_flight: bool,
pub last_prompt: Option<String>,
pub replay_prompt: Option<String>,
pub last_error: Option<WorkerFailure>,
@@ -143,6 +173,7 @@ impl WorkerRegistry {
trust_gate_cleared: false,
auto_recover_prompt_misdelivery,
prompt_delivery_attempts: 0,
prompt_in_flight: false,
last_prompt: None,
replay_prompt: None,
last_error: None,
@@ -155,6 +186,7 @@ impl WorkerRegistry {
WorkerEventKind::Spawning,
WorkerStatus::Spawning,
Some("worker created".to_string()),
None,
);
inner.workers.insert(worker_id, worker.clone());
worker
@@ -186,6 +218,10 @@ impl WorkerRegistry {
WorkerEventKind::TrustRequired,
WorkerStatus::TrustRequired,
Some("trust prompt detected".to_string()),
Some(WorkerEventPayload::TrustPrompt {
cwd: worker.cwd.clone(),
resolution: None,
}),
);
if worker.trust_auto_resolve {
@@ -197,26 +233,57 @@ impl WorkerRegistry {
WorkerEventKind::TrustResolved,
WorkerStatus::Spawning,
Some("allowlisted repo auto-resolved trust prompt".to_string()),
Some(WorkerEventPayload::TrustPrompt {
cwd: worker.cwd.clone(),
resolution: Some(WorkerTrustResolution::AutoAllowlisted),
}),
);
} else {
return Ok(worker.clone());
}
}
if prompt_misdelivery_is_relevant(worker)
&& detect_prompt_misdelivery(&lowered, worker.last_prompt.as_deref())
if let Some(observation) = prompt_misdelivery_is_relevant(worker)
.then(|| {
detect_prompt_misdelivery(
screen_text,
&lowered,
worker.last_prompt.as_deref(),
&worker.cwd,
)
})
.flatten()
{
let detail = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default());
let prompt_preview = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default());
let message = match observation.target {
WorkerPromptTarget::Shell => {
format!("worker prompt landed in shell instead of coding agent: {prompt_preview}")
}
WorkerPromptTarget::WrongTarget => format!(
"worker prompt landed in the wrong target instead of {}: {}",
worker.cwd, prompt_preview
),
WorkerPromptTarget::Unknown => format!(
"worker prompt delivery failed before reaching coding agent: {prompt_preview}"
),
};
worker.last_error = Some(WorkerFailure {
kind: WorkerFailureKind::PromptDelivery,
message: format!("worker prompt landed in shell instead of coding agent: {detail}"),
message,
created_at: now_secs(),
});
worker.prompt_in_flight = false;
push_event(
worker,
WorkerEventKind::PromptMisdelivery,
WorkerStatus::Blocked,
Some("shell misdelivery detected".to_string()),
WorkerStatus::Failed,
Some(prompt_misdelivery_detail(&observation).to_string()),
Some(WorkerEventPayload::PromptDelivery {
prompt_preview: prompt_preview.clone(),
observed_target: observation.target,
observed_cwd: observation.observed_cwd.clone(),
recovery_armed: false,
}),
);
if worker.auto_recover_prompt_misdelivery {
worker.replay_prompt = worker.last_prompt.clone();
@@ -225,37 +292,29 @@ impl WorkerRegistry {
worker,
WorkerEventKind::PromptReplayArmed,
WorkerStatus::ReadyForPrompt,
Some("prompt replay armed after shell misdelivery".to_string()),
Some("prompt replay armed after prompt misdelivery".to_string()),
Some(WorkerEventPayload::PromptDelivery {
prompt_preview,
observed_target: observation.target,
observed_cwd: observation.observed_cwd,
recovery_armed: true,
}),
);
} else {
worker.status = WorkerStatus::Blocked;
worker.status = WorkerStatus::Failed;
}
return Ok(worker.clone());
}
if detect_running_cue(&lowered)
&& matches!(
worker.status,
WorkerStatus::PromptAccepted | WorkerStatus::ReadyForPrompt
)
{
if detect_running_cue(&lowered) && worker.prompt_in_flight {
worker.prompt_in_flight = false;
worker.status = WorkerStatus::Running;
worker.last_error = None;
push_event(
worker,
WorkerEventKind::Running,
WorkerStatus::Running,
Some("worker accepted prompt and started running".to_string()),
);
}
if detect_ready_for_prompt(screen_text, &lowered)
&& !matches!(
worker.status,
WorkerStatus::ReadyForPrompt | WorkerStatus::Running
)
{
if detect_ready_for_prompt(screen_text, &lowered) && worker.status != WorkerStatus::ReadyForPrompt {
worker.status = WorkerStatus::ReadyForPrompt;
worker.prompt_in_flight = false;
if matches!(
worker.last_error.as_ref().map(|failure| failure.kind),
Some(WorkerFailureKind::TrustGate)
@@ -267,6 +326,7 @@ impl WorkerRegistry {
WorkerEventKind::ReadyForPrompt,
WorkerStatus::ReadyForPrompt,
Some("worker is ready for prompt delivery".to_string()),
None,
);
}
@@ -295,6 +355,10 @@ impl WorkerRegistry {
WorkerEventKind::TrustResolved,
WorkerStatus::Spawning,
Some("trust prompt resolved manually".to_string()),
Some(WorkerEventPayload::TrustPrompt {
cwd: worker.cwd.clone(),
resolution: Some(WorkerTrustResolution::ManualApproval),
}),
);
Ok(worker.clone())
}
@@ -321,18 +385,20 @@ impl WorkerRegistry {
.ok_or_else(|| format!("worker {worker_id} has no prompt to send or replay"))?;
worker.prompt_delivery_attempts += 1;
worker.prompt_in_flight = true;
worker.last_prompt = Some(next_prompt.clone());
worker.replay_prompt = None;
worker.last_error = None;
worker.status = WorkerStatus::PromptAccepted;
worker.status = WorkerStatus::Running;
push_event(
worker,
WorkerEventKind::PromptAccepted,
WorkerStatus::PromptAccepted,
WorkerEventKind::Running,
WorkerStatus::Running,
Some(format!(
"prompt accepted for delivery: {}",
"prompt dispatched to worker: {}",
prompt_preview(&next_prompt)
)),
None,
);
Ok(worker.clone())
}
@@ -346,10 +412,7 @@ impl WorkerRegistry {
worker_id: worker.worker_id.clone(),
status: worker.status,
ready: worker.status == WorkerStatus::ReadyForPrompt,
blocked: matches!(
worker.status,
WorkerStatus::TrustRequired | WorkerStatus::Blocked
),
blocked: matches!(worker.status, WorkerStatus::TrustRequired | WorkerStatus::Failed),
replay_prompt_ready: worker.replay_prompt.is_some(),
last_error: worker.last_error.clone(),
})
@@ -367,11 +430,13 @@ impl WorkerRegistry {
worker.replay_prompt = None;
worker.last_error = None;
worker.prompt_delivery_attempts = 0;
worker.prompt_in_flight = false;
push_event(
worker,
WorkerEventKind::Restarted,
WorkerStatus::Spawning,
Some("worker restarted".to_string()),
None,
);
Ok(worker.clone())
}
@@ -383,11 +448,13 @@ impl WorkerRegistry {
.get_mut(worker_id)
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
worker.status = WorkerStatus::Finished;
worker.prompt_in_flight = false;
push_event(
worker,
WorkerEventKind::Finished,
WorkerStatus::Finished,
Some("worker terminated by control plane".to_string()),
None,
);
Ok(worker.clone())
}
@@ -406,7 +473,6 @@ impl WorkerRegistry {
.get_mut(worker_id)
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
// Detect degraded completion: finish=unknown with zero output is provider failure
let is_provider_failure =
(finish_reason == "unknown" && tokens_output == 0) || finish_reason == "error";
@@ -423,15 +489,17 @@ impl WorkerRegistry {
created_at: now_secs(),
});
worker.status = WorkerStatus::Failed;
worker.prompt_in_flight = false;
push_event(
worker,
WorkerEventKind::Failed,
WorkerStatus::Failed,
Some("provider failure classified".to_string()),
None,
);
} else {
// Normal completion
worker.status = WorkerStatus::Finished;
worker.prompt_in_flight = false;
worker.last_error = None;
push_event(
worker,
@@ -440,6 +508,7 @@ impl WorkerRegistry {
Some(format!(
"session completed: finish='{finish_reason}', tokens={tokens_output}"
)),
None,
);
}
@@ -458,10 +527,13 @@ pub struct WorkerReadySnapshot {
}
fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool {
matches!(
worker.status,
WorkerStatus::PromptAccepted | WorkerStatus::Running
) && worker.last_prompt.is_some()
worker.prompt_in_flight && worker.last_prompt.is_some()
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct PromptDeliveryObservation {
target: WorkerPromptTarget,
observed_cwd: Option<String>,
}
fn push_event(
@@ -469,6 +541,7 @@ fn push_event(
kind: WorkerEventKind,
status: WorkerStatus,
detail: Option<String>,
payload: Option<WorkerEventPayload>,
) {
let timestamp = now_secs();
let seq = worker.events.len() as u64 + 1;
@@ -478,6 +551,7 @@ fn push_event(
kind,
status,
detail,
payload,
timestamp,
});
}
@@ -561,11 +635,35 @@ fn is_shell_prompt(trimmed: &str) -> bool {
|| trimmed.starts_with('#')
}
fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool {
fn detect_prompt_misdelivery(
screen_text: &str,
lowered: &str,
prompt: Option<&str>,
expected_cwd: &str,
) -> Option<PromptDeliveryObservation> {
let Some(prompt) = prompt else {
return false;
return None;
};
let prompt_snippet = prompt
.lines()
.find(|line| !line.trim().is_empty())
.map(|line| line.trim().to_ascii_lowercase())
.unwrap_or_default();
if prompt_snippet.is_empty() {
return None;
}
let prompt_visible = lowered.contains(&prompt_snippet);
if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) {
if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) {
return Some(PromptDeliveryObservation {
target: WorkerPromptTarget::WrongTarget,
observed_cwd: Some(observed_cwd),
});
}
}
let shell_error = [
"command not found",
"syntax error near unexpected token",
@@ -576,17 +674,10 @@ fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool {
.iter()
.any(|needle| lowered.contains(needle));
if !shell_error {
return false;
}
let first_prompt_line = prompt
.lines()
.find(|line| !line.trim().is_empty())
.map(|line| line.trim().to_ascii_lowercase())
.unwrap_or_default();
first_prompt_line.is_empty() || lowered.contains(&first_prompt_line)
(shell_error && prompt_visible).then_some(PromptDeliveryObservation {
target: WorkerPromptTarget::Shell,
observed_cwd: None,
})
}
fn prompt_preview(prompt: &str) -> String {
@@ -598,6 +689,53 @@ fn prompt_preview(prompt: &str) -> String {
format!("{}", preview.trim_end())
}
fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str {
match observation.target {
WorkerPromptTarget::Shell => "shell misdelivery detected",
WorkerPromptTarget::WrongTarget => "prompt landed in wrong target",
WorkerPromptTarget::Unknown => "prompt delivery failure detected",
}
}
fn detect_observed_shell_cwd(screen_text: &str) -> Option<String> {
screen_text.lines().find_map(|line| {
let tokens = line.split_whitespace().collect::<Vec<_>>();
tokens
.iter()
.position(|token| is_shell_prompt_token(token))
.and_then(|index| index.checked_sub(1).map(|cwd_index| tokens[cwd_index]))
.filter(|candidate| looks_like_cwd_label(candidate))
.map(ToOwned::to_owned)
})
}
fn is_shell_prompt_token(token: &&str) -> bool {
matches!(*token, "$" | "%" | "#" | ">" | "" | "")
}
fn looks_like_cwd_label(candidate: &str) -> bool {
candidate.starts_with('/')
|| candidate.starts_with('~')
|| candidate.starts_with('.')
|| candidate.contains('/')
}
fn cwd_matches_observed_target(expected_cwd: &str, observed_cwd: &str) -> bool {
let expected = normalize_path(expected_cwd);
let expected_base = expected
.file_name()
.map(|segment| segment.to_string_lossy().into_owned())
.unwrap_or_else(|| expected.to_string_lossy().into_owned());
let observed_base = Path::new(observed_cwd)
.file_name()
.map(|segment| segment.to_string_lossy().into_owned())
.unwrap_or_else(|| observed_cwd.trim_matches(':').to_string());
expected.to_string_lossy().ends_with(observed_cwd)
|| observed_cwd.ends_with(expected.to_string_lossy().as_ref())
|| expected_base == observed_base
}
#[cfg(test)]
mod tests {
use super::*;
@@ -619,14 +757,30 @@ mod tests {
.expect("trust observe should succeed");
assert_eq!(after_trust.status, WorkerStatus::Spawning);
assert!(after_trust.trust_gate_cleared);
assert!(after_trust
let trust_required = after_trust
.events
.iter()
.any(|event| event.kind == WorkerEventKind::TrustRequired));
assert!(after_trust
.find(|event| event.kind == WorkerEventKind::TrustRequired)
.expect("trust required event should exist");
assert_eq!(
trust_required.payload,
Some(WorkerEventPayload::TrustPrompt {
cwd: "/tmp/worktrees/repo-a".to_string(),
resolution: None,
})
);
let trust_resolved = after_trust
.events
.iter()
.any(|event| event.kind == WorkerEventKind::TrustResolved));
.find(|event| event.kind == WorkerEventKind::TrustResolved)
.expect("trust resolved event should exist");
assert_eq!(
trust_resolved.payload,
Some(WorkerEventPayload::TrustPrompt {
cwd: "/tmp/worktrees/repo-a".to_string(),
resolution: Some(WorkerTrustResolution::AutoAllowlisted),
})
);
let ready = registry
.observe(&worker.worker_id, "Ready for your input\n>")
@@ -662,6 +816,18 @@ mod tests {
.expect("manual trust resolution should succeed");
assert_eq!(resolved.status, WorkerStatus::Spawning);
assert!(resolved.trust_gate_cleared);
let trust_resolved = resolved
.events
.iter()
.find(|event| event.kind == WorkerEventKind::TrustResolved)
.expect("manual trust resolve event should exist");
assert_eq!(
trust_resolved.payload,
Some(WorkerEventPayload::TrustPrompt {
cwd: "/tmp/repo-b".to_string(),
resolution: Some(WorkerTrustResolution::ManualApproval),
})
);
}
#[test]
@@ -679,11 +845,12 @@ mod tests {
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
let accepted = registry
let running = registry
.send_prompt(&worker.worker_id, Some("Implement worker handshake"))
.expect("prompt send should succeed");
assert_eq!(accepted.status, WorkerStatus::PromptAccepted);
assert_eq!(accepted.prompt_delivery_attempts, 1);
assert_eq!(running.status, WorkerStatus::Running);
assert_eq!(running.prompt_delivery_attempts, 1);
assert!(running.prompt_in_flight);
let recovered = registry
.observe(
@@ -703,23 +870,89 @@ mod tests {
recovered.replay_prompt.as_deref(),
Some("Implement worker handshake")
);
assert!(recovered
let misdelivery = recovered
.events
.iter()
.any(|event| event.kind == WorkerEventKind::PromptMisdelivery));
assert!(recovered
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
.expect("misdelivery event should exist");
assert_eq!(misdelivery.status, WorkerStatus::Failed);
assert_eq!(
misdelivery.payload,
Some(WorkerEventPayload::PromptDelivery {
prompt_preview: "Implement worker handshake".to_string(),
observed_target: WorkerPromptTarget::Shell,
observed_cwd: None,
recovery_armed: false,
})
);
let replay = recovered
.events
.iter()
.any(|event| event.kind == WorkerEventKind::PromptReplayArmed));
.find(|event| event.kind == WorkerEventKind::PromptReplayArmed)
.expect("replay event should exist");
assert_eq!(replay.status, WorkerStatus::ReadyForPrompt);
assert_eq!(
replay.payload,
Some(WorkerEventPayload::PromptDelivery {
prompt_preview: "Implement worker handshake".to_string(),
observed_target: WorkerPromptTarget::Shell,
observed_cwd: None,
recovery_armed: true,
})
);
let replayed = registry
.send_prompt(&worker.worker_id, None)
.expect("replay send should succeed");
assert_eq!(replayed.status, WorkerStatus::PromptAccepted);
assert_eq!(replayed.status, WorkerStatus::Running);
assert!(replayed.replay_prompt.is_none());
assert_eq!(replayed.prompt_delivery_attempts, 2);
}
#[test]
fn prompt_delivery_detects_wrong_target_and_replays_to_expected_worker() {
let registry = WorkerRegistry::new();
let worker = registry.create("/tmp/repo-target-a", &[], true);
registry
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests"))
.expect("prompt send should succeed");
let recovered = registry
.observe(
&worker.worker_id,
"/tmp/repo-target-b % Run the worker bootstrap tests\nzsh: command not found: Run",
)
.expect("wrong target should be detected");
assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt);
assert_eq!(
recovered.replay_prompt.as_deref(),
Some("Run the worker bootstrap tests")
);
assert!(recovered
.last_error
.expect("wrong target error should exist")
.message
.contains("wrong target"));
let misdelivery = recovered
.events
.iter()
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
.expect("wrong-target event should exist");
assert_eq!(
misdelivery.payload,
Some(WorkerEventPayload::PromptDelivery {
prompt_preview: "Run the worker bootstrap tests".to_string(),
observed_target: WorkerPromptTarget::WrongTarget,
observed_cwd: Some("/tmp/repo-target-b".to_string()),
recovery_armed: false,
})
);
}
#[test]
fn await_ready_surfaces_blocked_or_ready_worker_state() {
let registry = WorkerRegistry::new();
@@ -774,6 +1007,7 @@ mod tests {
assert_eq!(restarted.status, WorkerStatus::Spawning);
assert_eq!(restarted.prompt_delivery_attempts, 0);
assert!(restarted.last_prompt.is_none());
assert!(!restarted.prompt_in_flight);
let finished = registry
.terminate(&worker.worker_id)
@@ -796,7 +1030,6 @@ mod tests {
.send_prompt(&worker.worker_id, Some("Run tests"))
.expect("prompt send should succeed");
// Simulate degraded completion: finish="unknown", zero output
let failed = registry
.observe_completion(&worker.worker_id, "unknown", 0)
.expect("completion observe should succeed");
@@ -822,7 +1055,6 @@ mod tests {
.send_prompt(&worker.worker_id, Some("Run tests"))
.expect("prompt send should succeed");
// Normal completion with output
let finished = registry
.observe_completion(&worker.worker_id, "stop", 150)
.expect("completion observe should succeed");

View File

@@ -212,7 +212,7 @@ impl CliOutputFormat {
fn parse_args(args: &[String]) -> Result<CliAction, String> {
let mut model = DEFAULT_MODEL.to_string();
let mut output_format = CliOutputFormat::Text;
let mut permission_mode = default_permission_mode();
let mut permission_mode_override = None;
let mut wants_help = false;
let mut wants_version = false;
let mut allowed_tool_values = Vec::new();
@@ -251,7 +251,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
let value = args
.get(index + 1)
.ok_or_else(|| "missing value for --permission-mode".to_string())?;
permission_mode = parse_permission_mode_arg(value)?;
permission_mode_override = Some(parse_permission_mode_arg(value)?);
index += 2;
}
flag if flag.starts_with("--output-format=") => {
@@ -259,11 +259,11 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
index += 1;
}
flag if flag.starts_with("--permission-mode=") => {
permission_mode = parse_permission_mode_arg(&flag[18..])?;
permission_mode_override = Some(parse_permission_mode_arg(&flag[18..])?);
index += 1;
}
"--dangerously-skip-permissions" => {
permission_mode = PermissionMode::DangerFullAccess;
permission_mode_override = Some(PermissionMode::DangerFullAccess);
index += 1;
}
"-p" => {
@@ -277,7 +277,8 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
model: resolve_model_alias(&model).to_string(),
output_format,
allowed_tools: normalize_allowed_tools(&allowed_tool_values)?,
permission_mode,
permission_mode: permission_mode_override
.unwrap_or_else(default_permission_mode),
});
}
"--print" => {
@@ -330,6 +331,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
let allowed_tools = normalize_allowed_tools(&allowed_tool_values)?;
if rest.is_empty() {
let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode);
return Ok(CliAction::Repl {
model,
allowed_tools,
@@ -339,10 +341,13 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
if rest.first().map(String::as_str) == Some("--resume") {
return parse_resume_args(&rest[1..]);
}
if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode) {
if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode_override)
{
return action;
}
let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode);
match rest[0].as_str() {
"dump-manifests" => Ok(CliAction::DumpManifests),
"bootstrap-plan" => Ok(CliAction::BootstrapPlan),
@@ -386,7 +391,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
fn parse_single_word_command_alias(
rest: &[String],
model: &str,
permission_mode: PermissionMode,
permission_mode_override: Option<PermissionMode>,
) -> Option<Result<CliAction, String>> {
if rest.len() != 1 {
return None;
@@ -397,7 +402,7 @@ fn parse_single_word_command_alias(
"version" => Some(Ok(CliAction::Version)),
"status" => Some(Ok(CliAction::Status {
model: model.to_string(),
permission_mode,
permission_mode: permission_mode_override.unwrap_or_else(default_permission_mode),
})),
"sandbox" => Some(Ok(CliAction::Sandbox)),
other => bare_slash_command_guidance(other).map(Err),
@@ -588,6 +593,9 @@ fn resolve_model_alias(model: &str) -> &str {
}
fn normalize_allowed_tools(values: &[String]) -> Result<Option<AllowedToolSet>, String> {
if values.is_empty() {
return Ok(None);
}
current_tool_registry()?.normalize_allowed_tools(values)
}
@@ -5718,11 +5726,17 @@ mod tests {
}
fn with_current_dir<T>(cwd: &Path, f: impl FnOnce() -> T) -> T {
let _guard = cwd_lock()
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let previous = std::env::current_dir().expect("cwd should load");
std::env::set_current_dir(cwd).expect("cwd should change");
let result = f();
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
std::env::set_current_dir(previous).expect("cwd should restore");
result
match result {
Ok(value) => value,
Err(payload) => std::panic::resume_unwind(payload),
}
}
fn write_plugin_fixture(root: &Path, name: &str, include_hooks: bool, include_lifecycle: bool) {

View File

@@ -5031,6 +5031,14 @@ mod tests {
let observed_output: serde_json::Value = serde_json::from_str(&observed).expect("json");
assert_eq!(observed_output["status"], "spawning");
assert_eq!(observed_output["trust_gate_cleared"], true);
assert_eq!(
observed_output["events"][1]["payload"]["type"],
"trust_prompt"
);
assert_eq!(
observed_output["events"][2]["payload"]["resolution"],
"auto_allowlisted"
);
let ready = execute_tool(
"WorkerObserve",
@@ -5063,8 +5071,9 @@ mod tests {
)
.expect("WorkerSendPrompt should succeed after ready");
let accepted_output: serde_json::Value = serde_json::from_str(&accepted).expect("json");
assert_eq!(accepted_output["status"], "prompt_accepted");
assert_eq!(accepted_output["status"], "running");
assert_eq!(accepted_output["prompt_delivery_attempts"], 1);
assert_eq!(accepted_output["prompt_in_flight"], true);
}
#[test]
@@ -5112,6 +5121,14 @@ mod tests {
assert_eq!(recovered_output["status"], "ready_for_prompt");
assert_eq!(recovered_output["last_error"]["kind"], "prompt_delivery");
assert_eq!(recovered_output["replay_prompt"], "Investigate flaky boot");
assert_eq!(
recovered_output["events"][3]["payload"]["observed_target"],
"shell"
);
assert_eq!(
recovered_output["events"][4]["payload"]["recovery_armed"],
true
);
let replayed = execute_tool(
"WorkerSendPrompt",
@@ -5121,8 +5138,9 @@ mod tests {
)
.expect("WorkerSendPrompt should replay recovered prompt");
let replayed_output: serde_json::Value = serde_json::from_str(&replayed).expect("json");
assert_eq!(replayed_output["status"], "prompt_accepted");
assert_eq!(replayed_output["status"], "running");
assert_eq!(replayed_output["prompt_delivery_attempts"], 2);
assert_eq!(replayed_output["prompt_in_flight"], true);
}
#[test]