mirror of
https://github.com/instructkr/claude-code.git
synced 2026-04-05 18:58:48 +03:00
Harden worker boot recovery before task dispatch
The worker boot registry now exposes the requested lifecycle states, emits structured trust and prompt-delivery events, and recovers from shell or wrong-target prompt delivery by replaying the last prompt. Supporting fixes keep MCP remote config parsing backwards-compatible and make CLI argument parsing less dependent on ambient config and cwd state so the workspace stays green under full parallel test runs. Constraint: Worker prompts must not be dispatched before a confirmed ready_for_prompt handshake Constraint: Prompt misdelivery recovery must stay minimal and avoid new dependencies Rejected: Keep prompt_accepted and blocked as public lifecycle states | user requested the narrower explicit state set Rejected: Treat url-only MCP server configs as invalid | existing CLI/runtime tests still rely on that shorthand Confidence: high Scope-risk: moderate Reversibility: clean Directive: Preserve prompt_in_flight semantics when extending worker boot; misdelivery detection depends on it Tested: cargo build --workspace; cargo test --workspace Not-tested: Live tmux worker delivery against a real external coding agent pane
This commit is contained in:
@@ -786,7 +786,8 @@ fn parse_mcp_server_config(
|
||||
context: &str,
|
||||
) -> Result<McpServerConfig, ConfigError> {
|
||||
let object = expect_object(value, context)?;
|
||||
let server_type = optional_string(object, "type", context)?.unwrap_or("stdio");
|
||||
let server_type =
|
||||
optional_string(object, "type", context)?.unwrap_or_else(|| infer_mcp_server_type(object));
|
||||
match server_type {
|
||||
"stdio" => Ok(McpServerConfig::Stdio(McpStdioServerConfig {
|
||||
command: expect_string(object, "command", context)?.to_string(),
|
||||
@@ -818,6 +819,14 @@ fn parse_mcp_server_config(
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_mcp_server_type(object: &BTreeMap<String, JsonValue>) -> &'static str {
|
||||
if object.contains_key("url") {
|
||||
"http"
|
||||
} else {
|
||||
"stdio"
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_mcp_remote_server_config(
|
||||
object: &BTreeMap<String, JsonValue>,
|
||||
context: &str,
|
||||
@@ -1297,6 +1306,41 @@ mod tests {
|
||||
fs::remove_dir_all(root).expect("cleanup temp dir");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn infers_http_mcp_servers_from_url_only_config() {
|
||||
let root = temp_dir();
|
||||
let cwd = root.join("project");
|
||||
let home = root.join("home").join(".claw");
|
||||
fs::create_dir_all(&home).expect("home config dir");
|
||||
fs::create_dir_all(&cwd).expect("project dir");
|
||||
fs::write(
|
||||
home.join("settings.json"),
|
||||
r#"{
|
||||
"mcpServers": {
|
||||
"remote": {
|
||||
"url": "https://example.test/mcp"
|
||||
}
|
||||
}
|
||||
}"#,
|
||||
)
|
||||
.expect("write mcp settings");
|
||||
|
||||
let loaded = ConfigLoader::new(&cwd, &home)
|
||||
.load()
|
||||
.expect("config should load");
|
||||
|
||||
let remote_server = loaded.mcp().get("remote").expect("remote server should exist");
|
||||
assert_eq!(remote_server.transport(), McpTransport::Http);
|
||||
match &remote_server.config {
|
||||
McpServerConfig::Http(config) => {
|
||||
assert_eq!(config.url, "https://example.test/mcp");
|
||||
}
|
||||
other => panic!("expected http config, got {other:?}"),
|
||||
}
|
||||
|
||||
fs::remove_dir_all(root).expect("cleanup temp dir");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_plugin_config_from_enabled_plugins() {
|
||||
let root = temp_dir();
|
||||
|
||||
@@ -143,8 +143,9 @@ pub use usage::{
|
||||
format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker,
|
||||
};
|
||||
pub use worker_boot::{
|
||||
Worker, WorkerEvent, WorkerEventKind, WorkerFailure, WorkerFailureKind, WorkerReadySnapshot,
|
||||
WorkerRegistry, WorkerStatus,
|
||||
Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind,
|
||||
WorkerPromptTarget, WorkerReadySnapshot, WorkerRegistry, WorkerStatus,
|
||||
WorkerTrustResolution,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -360,7 +360,8 @@ impl McpServerManagerError {
|
||||
}
|
||||
|
||||
fn recoverable(&self) -> bool {
|
||||
matches!(self, Self::Transport { .. } | Self::Timeout { .. })
|
||||
!matches!(self.lifecycle_phase(), McpLifecyclePhase::InitializeHandshake)
|
||||
&& matches!(self, Self::Transport { .. } | Self::Timeout { .. })
|
||||
}
|
||||
|
||||
fn discovery_failure(&self, server_name: &str) -> McpDiscoveryFailure {
|
||||
|
||||
@@ -24,9 +24,7 @@ pub enum WorkerStatus {
|
||||
Spawning,
|
||||
TrustRequired,
|
||||
ReadyForPrompt,
|
||||
PromptAccepted,
|
||||
Running,
|
||||
Blocked,
|
||||
Finished,
|
||||
Failed,
|
||||
}
|
||||
@@ -37,9 +35,7 @@ impl std::fmt::Display for WorkerStatus {
|
||||
Self::Spawning => write!(f, "spawning"),
|
||||
Self::TrustRequired => write!(f, "trust_required"),
|
||||
Self::ReadyForPrompt => write!(f, "ready_for_prompt"),
|
||||
Self::PromptAccepted => write!(f, "prompt_accepted"),
|
||||
Self::Running => write!(f, "running"),
|
||||
Self::Blocked => write!(f, "blocked"),
|
||||
Self::Finished => write!(f, "finished"),
|
||||
Self::Failed => write!(f, "failed"),
|
||||
}
|
||||
@@ -69,7 +65,6 @@ pub enum WorkerEventKind {
|
||||
TrustRequired,
|
||||
TrustResolved,
|
||||
ReadyForPrompt,
|
||||
PromptAccepted,
|
||||
PromptMisdelivery,
|
||||
PromptReplayArmed,
|
||||
Running,
|
||||
@@ -78,12 +73,46 @@ pub enum WorkerEventKind {
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WorkerTrustResolution {
|
||||
AutoAllowlisted,
|
||||
ManualApproval,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WorkerPromptTarget {
|
||||
Shell,
|
||||
WrongTarget,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub enum WorkerEventPayload {
|
||||
TrustPrompt {
|
||||
cwd: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
resolution: Option<WorkerTrustResolution>,
|
||||
},
|
||||
PromptDelivery {
|
||||
prompt_preview: String,
|
||||
observed_target: WorkerPromptTarget,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
observed_cwd: Option<String>,
|
||||
recovery_armed: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct WorkerEvent {
|
||||
pub seq: u64,
|
||||
pub kind: WorkerEventKind,
|
||||
pub status: WorkerStatus,
|
||||
pub detail: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub payload: Option<WorkerEventPayload>,
|
||||
pub timestamp: u64,
|
||||
}
|
||||
|
||||
@@ -96,6 +125,7 @@ pub struct Worker {
|
||||
pub trust_gate_cleared: bool,
|
||||
pub auto_recover_prompt_misdelivery: bool,
|
||||
pub prompt_delivery_attempts: u32,
|
||||
pub prompt_in_flight: bool,
|
||||
pub last_prompt: Option<String>,
|
||||
pub replay_prompt: Option<String>,
|
||||
pub last_error: Option<WorkerFailure>,
|
||||
@@ -143,6 +173,7 @@ impl WorkerRegistry {
|
||||
trust_gate_cleared: false,
|
||||
auto_recover_prompt_misdelivery,
|
||||
prompt_delivery_attempts: 0,
|
||||
prompt_in_flight: false,
|
||||
last_prompt: None,
|
||||
replay_prompt: None,
|
||||
last_error: None,
|
||||
@@ -155,6 +186,7 @@ impl WorkerRegistry {
|
||||
WorkerEventKind::Spawning,
|
||||
WorkerStatus::Spawning,
|
||||
Some("worker created".to_string()),
|
||||
None,
|
||||
);
|
||||
inner.workers.insert(worker_id, worker.clone());
|
||||
worker
|
||||
@@ -186,6 +218,10 @@ impl WorkerRegistry {
|
||||
WorkerEventKind::TrustRequired,
|
||||
WorkerStatus::TrustRequired,
|
||||
Some("trust prompt detected".to_string()),
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: worker.cwd.clone(),
|
||||
resolution: None,
|
||||
}),
|
||||
);
|
||||
|
||||
if worker.trust_auto_resolve {
|
||||
@@ -197,26 +233,57 @@ impl WorkerRegistry {
|
||||
WorkerEventKind::TrustResolved,
|
||||
WorkerStatus::Spawning,
|
||||
Some("allowlisted repo auto-resolved trust prompt".to_string()),
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: worker.cwd.clone(),
|
||||
resolution: Some(WorkerTrustResolution::AutoAllowlisted),
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
return Ok(worker.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if prompt_misdelivery_is_relevant(worker)
|
||||
&& detect_prompt_misdelivery(&lowered, worker.last_prompt.as_deref())
|
||||
if let Some(observation) = prompt_misdelivery_is_relevant(worker)
|
||||
.then(|| {
|
||||
detect_prompt_misdelivery(
|
||||
screen_text,
|
||||
&lowered,
|
||||
worker.last_prompt.as_deref(),
|
||||
&worker.cwd,
|
||||
)
|
||||
})
|
||||
.flatten()
|
||||
{
|
||||
let detail = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default());
|
||||
let prompt_preview = prompt_preview(worker.last_prompt.as_deref().unwrap_or_default());
|
||||
let message = match observation.target {
|
||||
WorkerPromptTarget::Shell => {
|
||||
format!("worker prompt landed in shell instead of coding agent: {prompt_preview}")
|
||||
}
|
||||
WorkerPromptTarget::WrongTarget => format!(
|
||||
"worker prompt landed in the wrong target instead of {}: {}",
|
||||
worker.cwd, prompt_preview
|
||||
),
|
||||
WorkerPromptTarget::Unknown => format!(
|
||||
"worker prompt delivery failed before reaching coding agent: {prompt_preview}"
|
||||
),
|
||||
};
|
||||
worker.last_error = Some(WorkerFailure {
|
||||
kind: WorkerFailureKind::PromptDelivery,
|
||||
message: format!("worker prompt landed in shell instead of coding agent: {detail}"),
|
||||
message,
|
||||
created_at: now_secs(),
|
||||
});
|
||||
worker.prompt_in_flight = false;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::PromptMisdelivery,
|
||||
WorkerStatus::Blocked,
|
||||
Some("shell misdelivery detected".to_string()),
|
||||
WorkerStatus::Failed,
|
||||
Some(prompt_misdelivery_detail(&observation).to_string()),
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview: prompt_preview.clone(),
|
||||
observed_target: observation.target,
|
||||
observed_cwd: observation.observed_cwd.clone(),
|
||||
recovery_armed: false,
|
||||
}),
|
||||
);
|
||||
if worker.auto_recover_prompt_misdelivery {
|
||||
worker.replay_prompt = worker.last_prompt.clone();
|
||||
@@ -225,37 +292,29 @@ impl WorkerRegistry {
|
||||
worker,
|
||||
WorkerEventKind::PromptReplayArmed,
|
||||
WorkerStatus::ReadyForPrompt,
|
||||
Some("prompt replay armed after shell misdelivery".to_string()),
|
||||
Some("prompt replay armed after prompt misdelivery".to_string()),
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview,
|
||||
observed_target: observation.target,
|
||||
observed_cwd: observation.observed_cwd,
|
||||
recovery_armed: true,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
worker.status = WorkerStatus::Blocked;
|
||||
worker.status = WorkerStatus::Failed;
|
||||
}
|
||||
return Ok(worker.clone());
|
||||
}
|
||||
|
||||
if detect_running_cue(&lowered)
|
||||
&& matches!(
|
||||
worker.status,
|
||||
WorkerStatus::PromptAccepted | WorkerStatus::ReadyForPrompt
|
||||
)
|
||||
{
|
||||
if detect_running_cue(&lowered) && worker.prompt_in_flight {
|
||||
worker.prompt_in_flight = false;
|
||||
worker.status = WorkerStatus::Running;
|
||||
worker.last_error = None;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::Running,
|
||||
WorkerStatus::Running,
|
||||
Some("worker accepted prompt and started running".to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
if detect_ready_for_prompt(screen_text, &lowered)
|
||||
&& !matches!(
|
||||
worker.status,
|
||||
WorkerStatus::ReadyForPrompt | WorkerStatus::Running
|
||||
)
|
||||
{
|
||||
if detect_ready_for_prompt(screen_text, &lowered) && worker.status != WorkerStatus::ReadyForPrompt {
|
||||
worker.status = WorkerStatus::ReadyForPrompt;
|
||||
worker.prompt_in_flight = false;
|
||||
if matches!(
|
||||
worker.last_error.as_ref().map(|failure| failure.kind),
|
||||
Some(WorkerFailureKind::TrustGate)
|
||||
@@ -267,6 +326,7 @@ impl WorkerRegistry {
|
||||
WorkerEventKind::ReadyForPrompt,
|
||||
WorkerStatus::ReadyForPrompt,
|
||||
Some("worker is ready for prompt delivery".to_string()),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -295,6 +355,10 @@ impl WorkerRegistry {
|
||||
WorkerEventKind::TrustResolved,
|
||||
WorkerStatus::Spawning,
|
||||
Some("trust prompt resolved manually".to_string()),
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: worker.cwd.clone(),
|
||||
resolution: Some(WorkerTrustResolution::ManualApproval),
|
||||
}),
|
||||
);
|
||||
Ok(worker.clone())
|
||||
}
|
||||
@@ -321,18 +385,20 @@ impl WorkerRegistry {
|
||||
.ok_or_else(|| format!("worker {worker_id} has no prompt to send or replay"))?;
|
||||
|
||||
worker.prompt_delivery_attempts += 1;
|
||||
worker.prompt_in_flight = true;
|
||||
worker.last_prompt = Some(next_prompt.clone());
|
||||
worker.replay_prompt = None;
|
||||
worker.last_error = None;
|
||||
worker.status = WorkerStatus::PromptAccepted;
|
||||
worker.status = WorkerStatus::Running;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::PromptAccepted,
|
||||
WorkerStatus::PromptAccepted,
|
||||
WorkerEventKind::Running,
|
||||
WorkerStatus::Running,
|
||||
Some(format!(
|
||||
"prompt accepted for delivery: {}",
|
||||
"prompt dispatched to worker: {}",
|
||||
prompt_preview(&next_prompt)
|
||||
)),
|
||||
None,
|
||||
);
|
||||
Ok(worker.clone())
|
||||
}
|
||||
@@ -346,10 +412,7 @@ impl WorkerRegistry {
|
||||
worker_id: worker.worker_id.clone(),
|
||||
status: worker.status,
|
||||
ready: worker.status == WorkerStatus::ReadyForPrompt,
|
||||
blocked: matches!(
|
||||
worker.status,
|
||||
WorkerStatus::TrustRequired | WorkerStatus::Blocked
|
||||
),
|
||||
blocked: matches!(worker.status, WorkerStatus::TrustRequired | WorkerStatus::Failed),
|
||||
replay_prompt_ready: worker.replay_prompt.is_some(),
|
||||
last_error: worker.last_error.clone(),
|
||||
})
|
||||
@@ -367,11 +430,13 @@ impl WorkerRegistry {
|
||||
worker.replay_prompt = None;
|
||||
worker.last_error = None;
|
||||
worker.prompt_delivery_attempts = 0;
|
||||
worker.prompt_in_flight = false;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::Restarted,
|
||||
WorkerStatus::Spawning,
|
||||
Some("worker restarted".to_string()),
|
||||
None,
|
||||
);
|
||||
Ok(worker.clone())
|
||||
}
|
||||
@@ -383,11 +448,13 @@ impl WorkerRegistry {
|
||||
.get_mut(worker_id)
|
||||
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
|
||||
worker.status = WorkerStatus::Finished;
|
||||
worker.prompt_in_flight = false;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::Finished,
|
||||
WorkerStatus::Finished,
|
||||
Some("worker terminated by control plane".to_string()),
|
||||
None,
|
||||
);
|
||||
Ok(worker.clone())
|
||||
}
|
||||
@@ -406,7 +473,6 @@ impl WorkerRegistry {
|
||||
.get_mut(worker_id)
|
||||
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
|
||||
|
||||
// Detect degraded completion: finish=unknown with zero output is provider failure
|
||||
let is_provider_failure =
|
||||
(finish_reason == "unknown" && tokens_output == 0) || finish_reason == "error";
|
||||
|
||||
@@ -423,15 +489,17 @@ impl WorkerRegistry {
|
||||
created_at: now_secs(),
|
||||
});
|
||||
worker.status = WorkerStatus::Failed;
|
||||
worker.prompt_in_flight = false;
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::Failed,
|
||||
WorkerStatus::Failed,
|
||||
Some("provider failure classified".to_string()),
|
||||
None,
|
||||
);
|
||||
} else {
|
||||
// Normal completion
|
||||
worker.status = WorkerStatus::Finished;
|
||||
worker.prompt_in_flight = false;
|
||||
worker.last_error = None;
|
||||
push_event(
|
||||
worker,
|
||||
@@ -440,6 +508,7 @@ impl WorkerRegistry {
|
||||
Some(format!(
|
||||
"session completed: finish='{finish_reason}', tokens={tokens_output}"
|
||||
)),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -458,10 +527,13 @@ pub struct WorkerReadySnapshot {
|
||||
}
|
||||
|
||||
fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool {
|
||||
matches!(
|
||||
worker.status,
|
||||
WorkerStatus::PromptAccepted | WorkerStatus::Running
|
||||
) && worker.last_prompt.is_some()
|
||||
worker.prompt_in_flight && worker.last_prompt.is_some()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget,
|
||||
observed_cwd: Option<String>,
|
||||
}
|
||||
|
||||
fn push_event(
|
||||
@@ -469,6 +541,7 @@ fn push_event(
|
||||
kind: WorkerEventKind,
|
||||
status: WorkerStatus,
|
||||
detail: Option<String>,
|
||||
payload: Option<WorkerEventPayload>,
|
||||
) {
|
||||
let timestamp = now_secs();
|
||||
let seq = worker.events.len() as u64 + 1;
|
||||
@@ -478,6 +551,7 @@ fn push_event(
|
||||
kind,
|
||||
status,
|
||||
detail,
|
||||
payload,
|
||||
timestamp,
|
||||
});
|
||||
}
|
||||
@@ -561,11 +635,35 @@ fn is_shell_prompt(trimmed: &str) -> bool {
|
||||
|| trimmed.starts_with('#')
|
||||
}
|
||||
|
||||
fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool {
|
||||
fn detect_prompt_misdelivery(
|
||||
screen_text: &str,
|
||||
lowered: &str,
|
||||
prompt: Option<&str>,
|
||||
expected_cwd: &str,
|
||||
) -> Option<PromptDeliveryObservation> {
|
||||
let Some(prompt) = prompt else {
|
||||
return false;
|
||||
return None;
|
||||
};
|
||||
|
||||
let prompt_snippet = prompt
|
||||
.lines()
|
||||
.find(|line| !line.trim().is_empty())
|
||||
.map(|line| line.trim().to_ascii_lowercase())
|
||||
.unwrap_or_default();
|
||||
if prompt_snippet.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let prompt_visible = lowered.contains(&prompt_snippet);
|
||||
|
||||
if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) {
|
||||
if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) {
|
||||
return Some(PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget::WrongTarget,
|
||||
observed_cwd: Some(observed_cwd),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let shell_error = [
|
||||
"command not found",
|
||||
"syntax error near unexpected token",
|
||||
@@ -576,17 +674,10 @@ fn detect_prompt_misdelivery(lowered: &str, prompt: Option<&str>) -> bool {
|
||||
.iter()
|
||||
.any(|needle| lowered.contains(needle));
|
||||
|
||||
if !shell_error {
|
||||
return false;
|
||||
}
|
||||
|
||||
let first_prompt_line = prompt
|
||||
.lines()
|
||||
.find(|line| !line.trim().is_empty())
|
||||
.map(|line| line.trim().to_ascii_lowercase())
|
||||
.unwrap_or_default();
|
||||
|
||||
first_prompt_line.is_empty() || lowered.contains(&first_prompt_line)
|
||||
(shell_error && prompt_visible).then_some(PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn prompt_preview(prompt: &str) -> String {
|
||||
@@ -598,6 +689,53 @@ fn prompt_preview(prompt: &str) -> String {
|
||||
format!("{}…", preview.trim_end())
|
||||
}
|
||||
|
||||
fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str {
|
||||
match observation.target {
|
||||
WorkerPromptTarget::Shell => "shell misdelivery detected",
|
||||
WorkerPromptTarget::WrongTarget => "prompt landed in wrong target",
|
||||
WorkerPromptTarget::Unknown => "prompt delivery failure detected",
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_observed_shell_cwd(screen_text: &str) -> Option<String> {
|
||||
screen_text.lines().find_map(|line| {
|
||||
let tokens = line.split_whitespace().collect::<Vec<_>>();
|
||||
tokens
|
||||
.iter()
|
||||
.position(|token| is_shell_prompt_token(token))
|
||||
.and_then(|index| index.checked_sub(1).map(|cwd_index| tokens[cwd_index]))
|
||||
.filter(|candidate| looks_like_cwd_label(candidate))
|
||||
.map(ToOwned::to_owned)
|
||||
})
|
||||
}
|
||||
|
||||
fn is_shell_prompt_token(token: &&str) -> bool {
|
||||
matches!(*token, "$" | "%" | "#" | ">" | "›" | "❯")
|
||||
}
|
||||
|
||||
fn looks_like_cwd_label(candidate: &str) -> bool {
|
||||
candidate.starts_with('/')
|
||||
|| candidate.starts_with('~')
|
||||
|| candidate.starts_with('.')
|
||||
|| candidate.contains('/')
|
||||
}
|
||||
|
||||
fn cwd_matches_observed_target(expected_cwd: &str, observed_cwd: &str) -> bool {
|
||||
let expected = normalize_path(expected_cwd);
|
||||
let expected_base = expected
|
||||
.file_name()
|
||||
.map(|segment| segment.to_string_lossy().into_owned())
|
||||
.unwrap_or_else(|| expected.to_string_lossy().into_owned());
|
||||
let observed_base = Path::new(observed_cwd)
|
||||
.file_name()
|
||||
.map(|segment| segment.to_string_lossy().into_owned())
|
||||
.unwrap_or_else(|| observed_cwd.trim_matches(':').to_string());
|
||||
|
||||
expected.to_string_lossy().ends_with(observed_cwd)
|
||||
|| observed_cwd.ends_with(expected.to_string_lossy().as_ref())
|
||||
|| expected_base == observed_base
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -619,14 +757,30 @@ mod tests {
|
||||
.expect("trust observe should succeed");
|
||||
assert_eq!(after_trust.status, WorkerStatus::Spawning);
|
||||
assert!(after_trust.trust_gate_cleared);
|
||||
assert!(after_trust
|
||||
let trust_required = after_trust
|
||||
.events
|
||||
.iter()
|
||||
.any(|event| event.kind == WorkerEventKind::TrustRequired));
|
||||
assert!(after_trust
|
||||
.find(|event| event.kind == WorkerEventKind::TrustRequired)
|
||||
.expect("trust required event should exist");
|
||||
assert_eq!(
|
||||
trust_required.payload,
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: "/tmp/worktrees/repo-a".to_string(),
|
||||
resolution: None,
|
||||
})
|
||||
);
|
||||
let trust_resolved = after_trust
|
||||
.events
|
||||
.iter()
|
||||
.any(|event| event.kind == WorkerEventKind::TrustResolved));
|
||||
.find(|event| event.kind == WorkerEventKind::TrustResolved)
|
||||
.expect("trust resolved event should exist");
|
||||
assert_eq!(
|
||||
trust_resolved.payload,
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: "/tmp/worktrees/repo-a".to_string(),
|
||||
resolution: Some(WorkerTrustResolution::AutoAllowlisted),
|
||||
})
|
||||
);
|
||||
|
||||
let ready = registry
|
||||
.observe(&worker.worker_id, "Ready for your input\n>")
|
||||
@@ -662,6 +816,18 @@ mod tests {
|
||||
.expect("manual trust resolution should succeed");
|
||||
assert_eq!(resolved.status, WorkerStatus::Spawning);
|
||||
assert!(resolved.trust_gate_cleared);
|
||||
let trust_resolved = resolved
|
||||
.events
|
||||
.iter()
|
||||
.find(|event| event.kind == WorkerEventKind::TrustResolved)
|
||||
.expect("manual trust resolve event should exist");
|
||||
assert_eq!(
|
||||
trust_resolved.payload,
|
||||
Some(WorkerEventPayload::TrustPrompt {
|
||||
cwd: "/tmp/repo-b".to_string(),
|
||||
resolution: Some(WorkerTrustResolution::ManualApproval),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -679,11 +845,12 @@ mod tests {
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
|
||||
let accepted = registry
|
||||
let running = registry
|
||||
.send_prompt(&worker.worker_id, Some("Implement worker handshake"))
|
||||
.expect("prompt send should succeed");
|
||||
assert_eq!(accepted.status, WorkerStatus::PromptAccepted);
|
||||
assert_eq!(accepted.prompt_delivery_attempts, 1);
|
||||
assert_eq!(running.status, WorkerStatus::Running);
|
||||
assert_eq!(running.prompt_delivery_attempts, 1);
|
||||
assert!(running.prompt_in_flight);
|
||||
|
||||
let recovered = registry
|
||||
.observe(
|
||||
@@ -703,23 +870,89 @@ mod tests {
|
||||
recovered.replay_prompt.as_deref(),
|
||||
Some("Implement worker handshake")
|
||||
);
|
||||
assert!(recovered
|
||||
let misdelivery = recovered
|
||||
.events
|
||||
.iter()
|
||||
.any(|event| event.kind == WorkerEventKind::PromptMisdelivery));
|
||||
assert!(recovered
|
||||
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
|
||||
.expect("misdelivery event should exist");
|
||||
assert_eq!(misdelivery.status, WorkerStatus::Failed);
|
||||
assert_eq!(
|
||||
misdelivery.payload,
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview: "Implement worker handshake".to_string(),
|
||||
observed_target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
recovery_armed: false,
|
||||
})
|
||||
);
|
||||
let replay = recovered
|
||||
.events
|
||||
.iter()
|
||||
.any(|event| event.kind == WorkerEventKind::PromptReplayArmed));
|
||||
.find(|event| event.kind == WorkerEventKind::PromptReplayArmed)
|
||||
.expect("replay event should exist");
|
||||
assert_eq!(replay.status, WorkerStatus::ReadyForPrompt);
|
||||
assert_eq!(
|
||||
replay.payload,
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview: "Implement worker handshake".to_string(),
|
||||
observed_target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
recovery_armed: true,
|
||||
})
|
||||
);
|
||||
|
||||
let replayed = registry
|
||||
.send_prompt(&worker.worker_id, None)
|
||||
.expect("replay send should succeed");
|
||||
assert_eq!(replayed.status, WorkerStatus::PromptAccepted);
|
||||
assert_eq!(replayed.status, WorkerStatus::Running);
|
||||
assert!(replayed.replay_prompt.is_none());
|
||||
assert_eq!(replayed.prompt_delivery_attempts, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_delivery_detects_wrong_target_and_replays_to_expected_worker() {
|
||||
let registry = WorkerRegistry::new();
|
||||
let worker = registry.create("/tmp/repo-target-a", &[], true);
|
||||
registry
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests"))
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let recovered = registry
|
||||
.observe(
|
||||
&worker.worker_id,
|
||||
"/tmp/repo-target-b % Run the worker bootstrap tests\nzsh: command not found: Run",
|
||||
)
|
||||
.expect("wrong target should be detected");
|
||||
|
||||
assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt);
|
||||
assert_eq!(
|
||||
recovered.replay_prompt.as_deref(),
|
||||
Some("Run the worker bootstrap tests")
|
||||
);
|
||||
assert!(recovered
|
||||
.last_error
|
||||
.expect("wrong target error should exist")
|
||||
.message
|
||||
.contains("wrong target"));
|
||||
let misdelivery = recovered
|
||||
.events
|
||||
.iter()
|
||||
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
|
||||
.expect("wrong-target event should exist");
|
||||
assert_eq!(
|
||||
misdelivery.payload,
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview: "Run the worker bootstrap tests".to_string(),
|
||||
observed_target: WorkerPromptTarget::WrongTarget,
|
||||
observed_cwd: Some("/tmp/repo-target-b".to_string()),
|
||||
recovery_armed: false,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn await_ready_surfaces_blocked_or_ready_worker_state() {
|
||||
let registry = WorkerRegistry::new();
|
||||
@@ -774,6 +1007,7 @@ mod tests {
|
||||
assert_eq!(restarted.status, WorkerStatus::Spawning);
|
||||
assert_eq!(restarted.prompt_delivery_attempts, 0);
|
||||
assert!(restarted.last_prompt.is_none());
|
||||
assert!(!restarted.prompt_in_flight);
|
||||
|
||||
let finished = registry
|
||||
.terminate(&worker.worker_id)
|
||||
@@ -796,7 +1030,6 @@ mod tests {
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"))
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
// Simulate degraded completion: finish="unknown", zero output
|
||||
let failed = registry
|
||||
.observe_completion(&worker.worker_id, "unknown", 0)
|
||||
.expect("completion observe should succeed");
|
||||
@@ -822,7 +1055,6 @@ mod tests {
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"))
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
// Normal completion with output
|
||||
let finished = registry
|
||||
.observe_completion(&worker.worker_id, "stop", 150)
|
||||
.expect("completion observe should succeed");
|
||||
|
||||
@@ -212,7 +212,7 @@ impl CliOutputFormat {
|
||||
fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
let mut model = DEFAULT_MODEL.to_string();
|
||||
let mut output_format = CliOutputFormat::Text;
|
||||
let mut permission_mode = default_permission_mode();
|
||||
let mut permission_mode_override = None;
|
||||
let mut wants_help = false;
|
||||
let mut wants_version = false;
|
||||
let mut allowed_tool_values = Vec::new();
|
||||
@@ -251,7 +251,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
let value = args
|
||||
.get(index + 1)
|
||||
.ok_or_else(|| "missing value for --permission-mode".to_string())?;
|
||||
permission_mode = parse_permission_mode_arg(value)?;
|
||||
permission_mode_override = Some(parse_permission_mode_arg(value)?);
|
||||
index += 2;
|
||||
}
|
||||
flag if flag.starts_with("--output-format=") => {
|
||||
@@ -259,11 +259,11 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
index += 1;
|
||||
}
|
||||
flag if flag.starts_with("--permission-mode=") => {
|
||||
permission_mode = parse_permission_mode_arg(&flag[18..])?;
|
||||
permission_mode_override = Some(parse_permission_mode_arg(&flag[18..])?);
|
||||
index += 1;
|
||||
}
|
||||
"--dangerously-skip-permissions" => {
|
||||
permission_mode = PermissionMode::DangerFullAccess;
|
||||
permission_mode_override = Some(PermissionMode::DangerFullAccess);
|
||||
index += 1;
|
||||
}
|
||||
"-p" => {
|
||||
@@ -277,7 +277,8 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
model: resolve_model_alias(&model).to_string(),
|
||||
output_format,
|
||||
allowed_tools: normalize_allowed_tools(&allowed_tool_values)?,
|
||||
permission_mode,
|
||||
permission_mode: permission_mode_override
|
||||
.unwrap_or_else(default_permission_mode),
|
||||
});
|
||||
}
|
||||
"--print" => {
|
||||
@@ -330,6 +331,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
let allowed_tools = normalize_allowed_tools(&allowed_tool_values)?;
|
||||
|
||||
if rest.is_empty() {
|
||||
let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode);
|
||||
return Ok(CliAction::Repl {
|
||||
model,
|
||||
allowed_tools,
|
||||
@@ -339,10 +341,13 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
if rest.first().map(String::as_str) == Some("--resume") {
|
||||
return parse_resume_args(&rest[1..]);
|
||||
}
|
||||
if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode) {
|
||||
if let Some(action) = parse_single_word_command_alias(&rest, &model, permission_mode_override)
|
||||
{
|
||||
return action;
|
||||
}
|
||||
|
||||
let permission_mode = permission_mode_override.unwrap_or_else(default_permission_mode);
|
||||
|
||||
match rest[0].as_str() {
|
||||
"dump-manifests" => Ok(CliAction::DumpManifests),
|
||||
"bootstrap-plan" => Ok(CliAction::BootstrapPlan),
|
||||
@@ -386,7 +391,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
fn parse_single_word_command_alias(
|
||||
rest: &[String],
|
||||
model: &str,
|
||||
permission_mode: PermissionMode,
|
||||
permission_mode_override: Option<PermissionMode>,
|
||||
) -> Option<Result<CliAction, String>> {
|
||||
if rest.len() != 1 {
|
||||
return None;
|
||||
@@ -397,7 +402,7 @@ fn parse_single_word_command_alias(
|
||||
"version" => Some(Ok(CliAction::Version)),
|
||||
"status" => Some(Ok(CliAction::Status {
|
||||
model: model.to_string(),
|
||||
permission_mode,
|
||||
permission_mode: permission_mode_override.unwrap_or_else(default_permission_mode),
|
||||
})),
|
||||
"sandbox" => Some(Ok(CliAction::Sandbox)),
|
||||
other => bare_slash_command_guidance(other).map(Err),
|
||||
@@ -588,6 +593,9 @@ fn resolve_model_alias(model: &str) -> &str {
|
||||
}
|
||||
|
||||
fn normalize_allowed_tools(values: &[String]) -> Result<Option<AllowedToolSet>, String> {
|
||||
if values.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
current_tool_registry()?.normalize_allowed_tools(values)
|
||||
}
|
||||
|
||||
@@ -5718,11 +5726,17 @@ mod tests {
|
||||
}
|
||||
|
||||
fn with_current_dir<T>(cwd: &Path, f: impl FnOnce() -> T) -> T {
|
||||
let _guard = cwd_lock()
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let previous = std::env::current_dir().expect("cwd should load");
|
||||
std::env::set_current_dir(cwd).expect("cwd should change");
|
||||
let result = f();
|
||||
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
|
||||
std::env::set_current_dir(previous).expect("cwd should restore");
|
||||
result
|
||||
match result {
|
||||
Ok(value) => value,
|
||||
Err(payload) => std::panic::resume_unwind(payload),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_plugin_fixture(root: &Path, name: &str, include_hooks: bool, include_lifecycle: bool) {
|
||||
|
||||
@@ -5031,6 +5031,14 @@ mod tests {
|
||||
let observed_output: serde_json::Value = serde_json::from_str(&observed).expect("json");
|
||||
assert_eq!(observed_output["status"], "spawning");
|
||||
assert_eq!(observed_output["trust_gate_cleared"], true);
|
||||
assert_eq!(
|
||||
observed_output["events"][1]["payload"]["type"],
|
||||
"trust_prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
observed_output["events"][2]["payload"]["resolution"],
|
||||
"auto_allowlisted"
|
||||
);
|
||||
|
||||
let ready = execute_tool(
|
||||
"WorkerObserve",
|
||||
@@ -5063,8 +5071,9 @@ mod tests {
|
||||
)
|
||||
.expect("WorkerSendPrompt should succeed after ready");
|
||||
let accepted_output: serde_json::Value = serde_json::from_str(&accepted).expect("json");
|
||||
assert_eq!(accepted_output["status"], "prompt_accepted");
|
||||
assert_eq!(accepted_output["status"], "running");
|
||||
assert_eq!(accepted_output["prompt_delivery_attempts"], 1);
|
||||
assert_eq!(accepted_output["prompt_in_flight"], true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -5112,6 +5121,14 @@ mod tests {
|
||||
assert_eq!(recovered_output["status"], "ready_for_prompt");
|
||||
assert_eq!(recovered_output["last_error"]["kind"], "prompt_delivery");
|
||||
assert_eq!(recovered_output["replay_prompt"], "Investigate flaky boot");
|
||||
assert_eq!(
|
||||
recovered_output["events"][3]["payload"]["observed_target"],
|
||||
"shell"
|
||||
);
|
||||
assert_eq!(
|
||||
recovered_output["events"][4]["payload"]["recovery_armed"],
|
||||
true
|
||||
);
|
||||
|
||||
let replayed = execute_tool(
|
||||
"WorkerSendPrompt",
|
||||
@@ -5121,8 +5138,9 @@ mod tests {
|
||||
)
|
||||
.expect("WorkerSendPrompt should replay recovered prompt");
|
||||
let replayed_output: serde_json::Value = serde_json::from_str(&replayed).expect("json");
|
||||
assert_eq!(replayed_output["status"], "prompt_accepted");
|
||||
assert_eq!(replayed_output["status"], "running");
|
||||
assert_eq!(replayed_output["prompt_delivery_attempts"], 2);
|
||||
assert_eq!(replayed_output["prompt_in_flight"], true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user