mirror of
https://github.com/instructkr/claude-code.git
synced 2026-04-05 18:58:48 +03:00
merge: ultraclaw/recovery-recipes into main
This commit is contained in:
@@ -17,6 +17,8 @@ pub mod mcp_tool_bridge;
|
||||
mod oauth;
|
||||
pub mod permission_enforcer;
|
||||
mod policy_engine;
|
||||
||||||| f76311f
|
||||
pub mod recovery_recipes;
|
||||
mod permissions;
|
||||
pub mod plugin_lifecycle;
|
||||
mod prompt;
|
||||
@@ -102,6 +104,11 @@ pub use plugin_lifecycle::{
|
||||
DegradedMode, DiscoveryResult, PluginHealthcheck, PluginLifecycle, PluginLifecycleEvent,
|
||||
PluginState, ResourceInfo, ServerHealth, ServerStatus, ToolInfo,
|
||||
};
|
||||
||||||| f76311f
|
||||
pub use recovery_recipes::{
|
||||
attempt_recovery, recipe_for, EscalationPolicy, FailureScenario, RecoveryContext,
|
||||
RecoveryEvent, RecoveryRecipe, RecoveryResult, RecoveryStep,
|
||||
};
|
||||
pub use prompt::{
|
||||
load_system_prompt, prepend_bullets, ContextFile, ProjectContext, PromptBuildError,
|
||||
SystemPromptBuilder, FRONTIER_MODEL_NAME, SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
|
||||
|
||||
554
rust/crates/runtime/src/recovery_recipes.rs
Normal file
554
rust/crates/runtime/src/recovery_recipes.rs
Normal file
@@ -0,0 +1,554 @@
|
||||
//! Recovery recipes for common failure scenarios.
|
||||
//!
|
||||
//! Encodes known automatic recoveries for the six failure scenarios
|
||||
//! listed in ROADMAP item 8, and enforces one automatic recovery
|
||||
//! attempt before escalation. Each attempt is emitted as a structured
|
||||
//! recovery event.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The six failure scenarios that have known recovery recipes.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum FailureScenario {
|
||||
TrustPromptUnresolved,
|
||||
PromptMisdelivery,
|
||||
StaleBranch,
|
||||
CompileRedCrossCrate,
|
||||
McpHandshakeFailure,
|
||||
PartialPluginStartup,
|
||||
}
|
||||
|
||||
impl FailureScenario {
|
||||
/// Returns all known failure scenarios.
|
||||
#[must_use]
|
||||
pub fn all() -> &'static [FailureScenario] {
|
||||
&[
|
||||
Self::TrustPromptUnresolved,
|
||||
Self::PromptMisdelivery,
|
||||
Self::StaleBranch,
|
||||
Self::CompileRedCrossCrate,
|
||||
Self::McpHandshakeFailure,
|
||||
Self::PartialPluginStartup,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for FailureScenario {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::TrustPromptUnresolved => write!(f, "trust_prompt_unresolved"),
|
||||
Self::PromptMisdelivery => write!(f, "prompt_misdelivery"),
|
||||
Self::StaleBranch => write!(f, "stale_branch"),
|
||||
Self::CompileRedCrossCrate => write!(f, "compile_red_cross_crate"),
|
||||
Self::McpHandshakeFailure => write!(f, "mcp_handshake_failure"),
|
||||
Self::PartialPluginStartup => write!(f, "partial_plugin_startup"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Individual step that can be executed as part of a recovery recipe.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RecoveryStep {
|
||||
AcceptTrustPrompt,
|
||||
RedirectPromptToAgent,
|
||||
RebaseBranch,
|
||||
CleanBuild,
|
||||
RetryMcpHandshake { timeout: u64 },
|
||||
RestartPlugin { name: String },
|
||||
EscalateToHuman { reason: String },
|
||||
}
|
||||
|
||||
/// Policy governing what happens when automatic recovery is exhausted.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum EscalationPolicy {
|
||||
AlertHuman,
|
||||
LogAndContinue,
|
||||
Abort,
|
||||
}
|
||||
|
||||
/// A recovery recipe encodes the sequence of steps to attempt for a
|
||||
/// given failure scenario, along with the maximum number of automatic
|
||||
/// attempts and the escalation policy.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RecoveryRecipe {
|
||||
pub scenario: FailureScenario,
|
||||
pub steps: Vec<RecoveryStep>,
|
||||
pub max_attempts: u32,
|
||||
pub escalation_policy: EscalationPolicy,
|
||||
}
|
||||
|
||||
/// Outcome of a recovery attempt.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RecoveryResult {
|
||||
Recovered {
|
||||
steps_taken: u32,
|
||||
},
|
||||
PartialRecovery {
|
||||
recovered: Vec<RecoveryStep>,
|
||||
remaining: Vec<RecoveryStep>,
|
||||
},
|
||||
EscalationRequired {
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Structured event emitted during recovery.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RecoveryEvent {
|
||||
RecoveryAttempted {
|
||||
scenario: FailureScenario,
|
||||
recipe: RecoveryRecipe,
|
||||
result: RecoveryResult,
|
||||
},
|
||||
RecoverySucceeded,
|
||||
RecoveryFailed,
|
||||
Escalated,
|
||||
}
|
||||
|
||||
/// Minimal context for tracking recovery state and emitting events.
|
||||
///
|
||||
/// Holds per-scenario attempt counts, a structured event log, and an
|
||||
/// optional simulation knob for controlling step outcomes during tests.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RecoveryContext {
|
||||
attempts: HashMap<FailureScenario, u32>,
|
||||
events: Vec<RecoveryEvent>,
|
||||
/// Optional step index at which simulated execution fails.
|
||||
/// `None` means all steps succeed.
|
||||
fail_at_step: Option<usize>,
|
||||
}
|
||||
|
||||
impl RecoveryContext {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Configure a step index at which simulated execution will fail.
|
||||
#[must_use]
|
||||
pub fn with_fail_at_step(mut self, index: usize) -> Self {
|
||||
self.fail_at_step = Some(index);
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the structured event log populated during recovery.
|
||||
#[must_use]
|
||||
pub fn events(&self) -> &[RecoveryEvent] {
|
||||
&self.events
|
||||
}
|
||||
|
||||
/// Returns the number of recovery attempts made for a scenario.
|
||||
#[must_use]
|
||||
pub fn attempt_count(&self, scenario: &FailureScenario) -> u32 {
|
||||
self.attempts.get(scenario).copied().unwrap_or(0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the known recovery recipe for the given failure scenario.
|
||||
#[must_use]
|
||||
pub fn recipe_for(scenario: &FailureScenario) -> RecoveryRecipe {
|
||||
match scenario {
|
||||
FailureScenario::TrustPromptUnresolved => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![RecoveryStep::AcceptTrustPrompt],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::AlertHuman,
|
||||
},
|
||||
FailureScenario::PromptMisdelivery => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![RecoveryStep::RedirectPromptToAgent],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::AlertHuman,
|
||||
},
|
||||
FailureScenario::StaleBranch => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![RecoveryStep::RebaseBranch, RecoveryStep::CleanBuild],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::AlertHuman,
|
||||
},
|
||||
FailureScenario::CompileRedCrossCrate => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![RecoveryStep::CleanBuild],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::AlertHuman,
|
||||
},
|
||||
FailureScenario::McpHandshakeFailure => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![RecoveryStep::RetryMcpHandshake { timeout: 5000 }],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::Abort,
|
||||
},
|
||||
FailureScenario::PartialPluginStartup => RecoveryRecipe {
|
||||
scenario: *scenario,
|
||||
steps: vec![
|
||||
RecoveryStep::RestartPlugin {
|
||||
name: "stalled".to_string(),
|
||||
},
|
||||
RecoveryStep::RetryMcpHandshake { timeout: 3000 },
|
||||
],
|
||||
max_attempts: 1,
|
||||
escalation_policy: EscalationPolicy::LogAndContinue,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempts automatic recovery for the given failure scenario.
|
||||
///
|
||||
/// Looks up the recipe, enforces the one-attempt-before-escalation
|
||||
/// policy, simulates step execution (controlled by the context), and
|
||||
/// emits structured [`RecoveryEvent`]s for every attempt.
|
||||
pub fn attempt_recovery(scenario: &FailureScenario, ctx: &mut RecoveryContext) -> RecoveryResult {
|
||||
let recipe = recipe_for(scenario);
|
||||
let attempt_count = ctx.attempts.entry(*scenario).or_insert(0);
|
||||
|
||||
// Enforce one automatic recovery attempt before escalation.
|
||||
if *attempt_count >= recipe.max_attempts {
|
||||
let result = RecoveryResult::EscalationRequired {
|
||||
reason: format!(
|
||||
"max recovery attempts ({}) exceeded for {}",
|
||||
recipe.max_attempts, scenario
|
||||
),
|
||||
};
|
||||
ctx.events.push(RecoveryEvent::RecoveryAttempted {
|
||||
scenario: *scenario,
|
||||
recipe,
|
||||
result: result.clone(),
|
||||
});
|
||||
ctx.events.push(RecoveryEvent::Escalated);
|
||||
return result;
|
||||
}
|
||||
|
||||
*attempt_count += 1;
|
||||
|
||||
// Execute steps, honoring the optional fail_at_step simulation.
|
||||
let fail_index = ctx.fail_at_step;
|
||||
let mut executed = Vec::new();
|
||||
let mut failed = false;
|
||||
|
||||
for (i, step) in recipe.steps.iter().enumerate() {
|
||||
if fail_index == Some(i) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
executed.push(step.clone());
|
||||
}
|
||||
|
||||
let result = if failed {
|
||||
let remaining: Vec<RecoveryStep> = recipe.steps[executed.len()..].to_vec();
|
||||
if executed.is_empty() {
|
||||
RecoveryResult::EscalationRequired {
|
||||
reason: format!("recovery failed at first step for {}", scenario),
|
||||
}
|
||||
} else {
|
||||
RecoveryResult::PartialRecovery {
|
||||
recovered: executed,
|
||||
remaining,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
RecoveryResult::Recovered {
|
||||
steps_taken: recipe.steps.len() as u32,
|
||||
}
|
||||
};
|
||||
|
||||
// Emit the attempt as structured event data.
|
||||
ctx.events.push(RecoveryEvent::RecoveryAttempted {
|
||||
scenario: *scenario,
|
||||
recipe,
|
||||
result: result.clone(),
|
||||
});
|
||||
|
||||
match &result {
|
||||
RecoveryResult::Recovered { .. } => {
|
||||
ctx.events.push(RecoveryEvent::RecoverySucceeded);
|
||||
}
|
||||
RecoveryResult::PartialRecovery { .. } => {
|
||||
ctx.events.push(RecoveryEvent::RecoveryFailed);
|
||||
}
|
||||
RecoveryResult::EscalationRequired { .. } => {
|
||||
ctx.events.push(RecoveryEvent::Escalated);
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn each_scenario_has_a_matching_recipe() {
|
||||
// given
|
||||
let scenarios = FailureScenario::all();
|
||||
|
||||
// when / then
|
||||
for scenario in scenarios {
|
||||
let recipe = recipe_for(scenario);
|
||||
assert_eq!(
|
||||
recipe.scenario, *scenario,
|
||||
"recipe scenario should match requested scenario"
|
||||
);
|
||||
assert!(
|
||||
!recipe.steps.is_empty(),
|
||||
"recipe for {} should have at least one step",
|
||||
scenario
|
||||
);
|
||||
assert!(
|
||||
recipe.max_attempts >= 1,
|
||||
"recipe for {} should allow at least one attempt",
|
||||
scenario
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn successful_recovery_returns_recovered_and_emits_events() {
|
||||
// given
|
||||
let mut ctx = RecoveryContext::new();
|
||||
let scenario = FailureScenario::TrustPromptUnresolved;
|
||||
|
||||
// when
|
||||
let result = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then
|
||||
assert_eq!(result, RecoveryResult::Recovered { steps_taken: 1 });
|
||||
assert_eq!(ctx.events().len(), 2);
|
||||
assert!(matches!(
|
||||
&ctx.events()[0],
|
||||
RecoveryEvent::RecoveryAttempted {
|
||||
scenario: s,
|
||||
result: r,
|
||||
..
|
||||
} if *s == FailureScenario::TrustPromptUnresolved
|
||||
&& matches!(r, RecoveryResult::Recovered { steps_taken: 1 })
|
||||
));
|
||||
assert_eq!(ctx.events()[1], RecoveryEvent::RecoverySucceeded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escalation_after_max_attempts_exceeded() {
|
||||
// given
|
||||
let mut ctx = RecoveryContext::new();
|
||||
let scenario = FailureScenario::PromptMisdelivery;
|
||||
|
||||
// when — first attempt succeeds
|
||||
let first = attempt_recovery(&scenario, &mut ctx);
|
||||
assert!(matches!(first, RecoveryResult::Recovered { .. }));
|
||||
|
||||
// when — second attempt should escalate
|
||||
let second = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then
|
||||
assert!(
|
||||
matches!(
|
||||
&second,
|
||||
RecoveryResult::EscalationRequired { reason }
|
||||
if reason.contains("max recovery attempts")
|
||||
),
|
||||
"second attempt should require escalation, got: {second:?}"
|
||||
);
|
||||
assert_eq!(ctx.attempt_count(&scenario), 1);
|
||||
assert!(ctx
|
||||
.events()
|
||||
.iter()
|
||||
.any(|e| matches!(e, RecoveryEvent::Escalated)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_recovery_when_step_fails_midway() {
|
||||
// given — PartialPluginStartup has two steps; fail at step index 1
|
||||
let mut ctx = RecoveryContext::new().with_fail_at_step(1);
|
||||
let scenario = FailureScenario::PartialPluginStartup;
|
||||
|
||||
// when
|
||||
let result = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then
|
||||
match &result {
|
||||
RecoveryResult::PartialRecovery {
|
||||
recovered,
|
||||
remaining,
|
||||
} => {
|
||||
assert_eq!(recovered.len(), 1, "one step should have succeeded");
|
||||
assert_eq!(remaining.len(), 1, "one step should remain");
|
||||
assert!(matches!(recovered[0], RecoveryStep::RestartPlugin { .. }));
|
||||
assert!(matches!(
|
||||
remaining[0],
|
||||
RecoveryStep::RetryMcpHandshake { .. }
|
||||
));
|
||||
}
|
||||
other => panic!("expected PartialRecovery, got {other:?}"),
|
||||
}
|
||||
assert!(ctx
|
||||
.events()
|
||||
.iter()
|
||||
.any(|e| matches!(e, RecoveryEvent::RecoveryFailed)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_step_failure_escalates_immediately() {
|
||||
// given — fail at step index 0
|
||||
let mut ctx = RecoveryContext::new().with_fail_at_step(0);
|
||||
let scenario = FailureScenario::CompileRedCrossCrate;
|
||||
|
||||
// when
|
||||
let result = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then
|
||||
assert!(
|
||||
matches!(
|
||||
&result,
|
||||
RecoveryResult::EscalationRequired { reason }
|
||||
if reason.contains("failed at first step")
|
||||
),
|
||||
"zero-step failure should escalate, got: {result:?}"
|
||||
);
|
||||
assert!(ctx
|
||||
.events()
|
||||
.iter()
|
||||
.any(|e| matches!(e, RecoveryEvent::Escalated)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emitted_events_include_structured_attempt_data() {
|
||||
// given
|
||||
let mut ctx = RecoveryContext::new();
|
||||
let scenario = FailureScenario::McpHandshakeFailure;
|
||||
|
||||
// when
|
||||
let _ = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then — verify the RecoveryAttempted event carries full context
|
||||
let attempted = ctx
|
||||
.events()
|
||||
.iter()
|
||||
.find(|e| matches!(e, RecoveryEvent::RecoveryAttempted { .. }))
|
||||
.expect("should have emitted RecoveryAttempted event");
|
||||
|
||||
match attempted {
|
||||
RecoveryEvent::RecoveryAttempted {
|
||||
scenario: s,
|
||||
recipe,
|
||||
result,
|
||||
} => {
|
||||
assert_eq!(*s, scenario);
|
||||
assert_eq!(recipe.scenario, scenario);
|
||||
assert!(!recipe.steps.is_empty());
|
||||
assert!(matches!(result, RecoveryResult::Recovered { .. }));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// Verify the event is serializable as structured JSON
|
||||
let json = serde_json::to_string(&ctx.events()[0])
|
||||
.expect("recovery event should be serializable to JSON");
|
||||
assert!(
|
||||
json.contains("mcp_handshake_failure"),
|
||||
"serialized event should contain scenario name"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recovery_context_tracks_attempts_per_scenario() {
|
||||
// given
|
||||
let mut ctx = RecoveryContext::new();
|
||||
|
||||
// when
|
||||
assert_eq!(ctx.attempt_count(&FailureScenario::StaleBranch), 0);
|
||||
attempt_recovery(&FailureScenario::StaleBranch, &mut ctx);
|
||||
|
||||
// then
|
||||
assert_eq!(ctx.attempt_count(&FailureScenario::StaleBranch), 1);
|
||||
assert_eq!(ctx.attempt_count(&FailureScenario::PromptMisdelivery), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_branch_recipe_has_rebase_then_clean_build() {
|
||||
// given
|
||||
let recipe = recipe_for(&FailureScenario::StaleBranch);
|
||||
|
||||
// then
|
||||
assert_eq!(recipe.steps.len(), 2);
|
||||
assert_eq!(recipe.steps[0], RecoveryStep::RebaseBranch);
|
||||
assert_eq!(recipe.steps[1], RecoveryStep::CleanBuild);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_plugin_startup_recipe_has_restart_then_handshake() {
|
||||
// given
|
||||
let recipe = recipe_for(&FailureScenario::PartialPluginStartup);
|
||||
|
||||
// then
|
||||
assert_eq!(recipe.steps.len(), 2);
|
||||
assert!(matches!(
|
||||
recipe.steps[0],
|
||||
RecoveryStep::RestartPlugin { .. }
|
||||
));
|
||||
assert!(matches!(
|
||||
recipe.steps[1],
|
||||
RecoveryStep::RetryMcpHandshake { timeout: 3000 }
|
||||
));
|
||||
assert_eq!(recipe.escalation_policy, EscalationPolicy::LogAndContinue);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failure_scenario_display_all_variants() {
|
||||
// given
|
||||
let cases = [
|
||||
(
|
||||
FailureScenario::TrustPromptUnresolved,
|
||||
"trust_prompt_unresolved",
|
||||
),
|
||||
(FailureScenario::PromptMisdelivery, "prompt_misdelivery"),
|
||||
(FailureScenario::StaleBranch, "stale_branch"),
|
||||
(
|
||||
FailureScenario::CompileRedCrossCrate,
|
||||
"compile_red_cross_crate",
|
||||
),
|
||||
(
|
||||
FailureScenario::McpHandshakeFailure,
|
||||
"mcp_handshake_failure",
|
||||
),
|
||||
(
|
||||
FailureScenario::PartialPluginStartup,
|
||||
"partial_plugin_startup",
|
||||
),
|
||||
];
|
||||
|
||||
// when / then
|
||||
for (scenario, expected) in &cases {
|
||||
assert_eq!(scenario.to_string(), *expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_step_success_reports_correct_steps_taken() {
|
||||
// given — StaleBranch has 2 steps, no simulated failure
|
||||
let mut ctx = RecoveryContext::new();
|
||||
let scenario = FailureScenario::StaleBranch;
|
||||
|
||||
// when
|
||||
let result = attempt_recovery(&scenario, &mut ctx);
|
||||
|
||||
// then
|
||||
assert_eq!(result, RecoveryResult::Recovered { steps_taken: 2 });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_handshake_recipe_uses_abort_escalation_policy() {
|
||||
// given
|
||||
let recipe = recipe_for(&FailureScenario::McpHandshakeFailure);
|
||||
|
||||
// then
|
||||
assert_eq!(recipe.escalation_policy, EscalationPolicy::Abort);
|
||||
assert_eq!(recipe.max_attempts, 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user