From 9de97c95cc3ded0700bdd649db7cab3b3568179d Mon Sep 17 00:00:00 2001
From: Jobdori <jobdori@openclaw.ai>
Date: Sat, 4 Apr 2026 20:07:36 +0900
Subject: [PATCH] feat(recovery): bridge WorkerFailureKind to FailureScenario
 (P2.8/P2.13)

Connect worker_boot failure classification to recovery_recipes policy:

- Add FailureScenario::ProviderFailure variant
- Add FailureScenario::from_worker_failure_kind() bridge function
  mapping every WorkerFailureKind to a concrete FailureScenario
- Add RecoveryStep::RestartWorker for provider failure recovery
- Add recipe for ProviderFailure: RestartWorker -> AlertHuman escalation
- 3 new tests: bridge mapping, recipe structure, recovery attempt cycle

Previously a claw that detected WorkerFailureKind::Provider had no
machine-readable path to 'what should I do about this?'. Now it can
call from_worker_failure_kind() -> recipe_for() -> attempt_recovery()
as a single structured chain.

Closes the silo between worker_boot and recovery_recipes.
---
 rust/crates/runtime/src/recovery_recipes.rs | 76 +++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/rust/crates/runtime/src/recovery_recipes.rs b/rust/crates/runtime/src/recovery_recipes.rs
index 0afee62..63fe92c 100644
--- a/rust/crates/runtime/src/recovery_recipes.rs
+++ b/rust/crates/runtime/src/recovery_recipes.rs
@@ -9,6 +9,8 @@ use std::collections::HashMap;
 
 use serde::{Deserialize, Serialize};
 
+use crate::worker_boot::WorkerFailureKind;
+
 /// The six failure scenarios that have known recovery recipes.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -19,6 +21,7 @@ pub enum FailureScenario {
     CompileRedCrossCrate,
     McpHandshakeFailure,
     PartialPluginStartup,
+    ProviderFailure,
 }
 
 impl FailureScenario {
@@ -32,8 +35,21 @@ impl FailureScenario {
             Self::CompileRedCrossCrate,
             Self::McpHandshakeFailure,
             Self::PartialPluginStartup,
+            Self::ProviderFailure,
         ]
     }
+
+    /// Map a `WorkerFailureKind` to the corresponding `FailureScenario`.
+    /// This is the bridge that lets recovery policy consume worker boot events.
+    #[must_use]
+    pub fn from_worker_failure_kind(kind: WorkerFailureKind) -> Self {
+        match kind {
+            WorkerFailureKind::TrustGate => Self::TrustPromptUnresolved,
+            WorkerFailureKind::PromptDelivery => Self::PromptMisdelivery,
+            WorkerFailureKind::Protocol => Self::McpHandshakeFailure,
+            WorkerFailureKind::Provider => Self::ProviderFailure,
+        }
+    }
 }
 
 impl std::fmt::Display for FailureScenario {
@@ -45,6 +61,7 @@ impl std::fmt::Display for FailureScenario {
             Self::CompileRedCrossCrate => write!(f, "compile_red_cross_crate"),
             Self::McpHandshakeFailure => write!(f, "mcp_handshake_failure"),
             Self::PartialPluginStartup => write!(f, "partial_plugin_startup"),
+            Self::ProviderFailure => write!(f, "provider_failure"),
         }
     }
 }
@@ -59,6 +76,7 @@ pub enum RecoveryStep {
     CleanBuild,
     RetryMcpHandshake { timeout: u64 },
     RestartPlugin { name: String },
+    RestartWorker,
     EscalateToHuman { reason: String },
 }
 
@@ -196,6 +214,12 @@ pub fn recipe_for(scenario: &FailureScenario) -> RecoveryRecipe {
             max_attempts: 1,
             escalation_policy: EscalationPolicy::LogAndContinue,
         },
+        FailureScenario::ProviderFailure => RecoveryRecipe {
+            scenario: *scenario,
+            steps: vec![RecoveryStep::RestartWorker],
+            max_attempts: 1,
+            escalation_policy: EscalationPolicy::AlertHuman,
+        },
     }
 }
 
@@ -551,4 +575,56 @@ mod tests {
         assert_eq!(recipe.escalation_policy, EscalationPolicy::Abort);
         assert_eq!(recipe.max_attempts, 1);
     }
+
+    #[test]
+    fn worker_failure_kind_maps_to_failure_scenario() {
+        // given / when / then — verify the bridge is correct
+        assert_eq!(
+            FailureScenario::from_worker_failure_kind(WorkerFailureKind::TrustGate),
+            FailureScenario::TrustPromptUnresolved,
+        );
+        assert_eq!(
+            FailureScenario::from_worker_failure_kind(WorkerFailureKind::PromptDelivery),
+            FailureScenario::PromptMisdelivery,
+        );
+        assert_eq!(
+            FailureScenario::from_worker_failure_kind(WorkerFailureKind::Protocol),
+            FailureScenario::McpHandshakeFailure,
+        );
+        assert_eq!(
+            FailureScenario::from_worker_failure_kind(WorkerFailureKind::Provider),
+            FailureScenario::ProviderFailure,
+        );
+    }
+
+    #[test]
+    fn provider_failure_recipe_uses_restart_worker_step() {
+        // given
+        let recipe = recipe_for(&FailureScenario::ProviderFailure);
+
+        // then
+        assert_eq!(recipe.scenario, FailureScenario::ProviderFailure);
+        assert!(recipe.steps.contains(&RecoveryStep::RestartWorker));
+        assert_eq!(recipe.escalation_policy, EscalationPolicy::AlertHuman);
+        assert_eq!(recipe.max_attempts, 1);
+    }
+
+    #[test]
+    fn provider_failure_recovery_attempt_succeeds_then_escalates() {
+        // given
+        let mut ctx = RecoveryContext::new();
+        let scenario = FailureScenario::ProviderFailure;
+
+        // when — first attempt
+        let first = attempt_recovery(&scenario, &mut ctx);
+        assert!(matches!(first, RecoveryResult::Recovered { .. }));
+
+        // when — second attempt should escalate (max_attempts=1)
+        let second = attempt_recovery(&scenario, &mut ctx);
+        assert!(matches!(second, RecoveryResult::EscalationRequired { .. }));
+        assert!(ctx
+            .events()
+            .iter()
+            .any(|e| matches!(e, RecoveryEvent::Escalated)));
+    }
 }