mirror of
https://github.com/instructkr/claude-code.git
synced 2026-04-05 18:58:48 +03:00
feat(harness+usage): add auto_compact and token_cost parity scenarios
Two new mock parity harness scenarios:
1. auto_compact_triggered (session-compaction category)
- Mock returns 50k input tokens, validates auto_compaction key
is present in JSON output
- Validates format parity; trigger behavior covered by
conversation::tests::auto_compacts_when_cumulative_input_threshold_is_crossed
2. token_cost_reporting (token-usage category)
- Mock returns known token counts (1k input, 500 output)
- Validates input/output token fields present in JSON output
Additional changes:
- Add estimated_cost to JSON prompt output (format_usd + pricing_for_model)
- Add final_text_sse_with_usage and text_message_response_with_usage helpers
to mock-anthropic-service for parameterized token counts
- Add ScenarioCase.extra_env and ScenarioCase.resume_session fields
- Update mock_parity_scenarios.json: 10 -> 12 scenarios
- Update harness request count assertion: 19 -> 21
cargo test --workspace: 558 passed, 0 failed
This commit is contained in:
@@ -98,6 +98,8 @@ enum Scenario {
|
||||
BashPermissionPromptApproved,
|
||||
BashPermissionPromptDenied,
|
||||
PluginToolRoundtrip,
|
||||
AutoCompactTriggered,
|
||||
TokenCostReporting,
|
||||
}
|
||||
|
||||
impl Scenario {
|
||||
@@ -113,6 +115,8 @@ impl Scenario {
|
||||
"bash_permission_prompt_approved" => Some(Self::BashPermissionPromptApproved),
|
||||
"bash_permission_prompt_denied" => Some(Self::BashPermissionPromptDenied),
|
||||
"plugin_tool_roundtrip" => Some(Self::PluginToolRoundtrip),
|
||||
"auto_compact_triggered" => Some(Self::AutoCompactTriggered),
|
||||
"token_cost_reporting" => Some(Self::TokenCostReporting),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -129,6 +133,8 @@ impl Scenario {
|
||||
Self::BashPermissionPromptApproved => "bash_permission_prompt_approved",
|
||||
Self::BashPermissionPromptDenied => "bash_permission_prompt_denied",
|
||||
Self::PluginToolRoundtrip => "plugin_tool_roundtrip",
|
||||
Self::AutoCompactTriggered => "auto_compact_triggered",
|
||||
Self::TokenCostReporting => "token_cost_reporting",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -452,6 +458,12 @@ fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String {
|
||||
&[r#"{"message":"hello from plugin parity"}"#],
|
||||
),
|
||||
},
|
||||
Scenario::AutoCompactTriggered => {
|
||||
final_text_sse_with_usage("auto compact parity complete.", 50_000, 200)
|
||||
}
|
||||
Scenario::TokenCostReporting => {
|
||||
final_text_sse_with_usage("token cost reporting parity complete.", 1_000, 500)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -610,6 +622,18 @@ fn build_message_response(request: &MessageRequest, scenario: Scenario) -> Messa
|
||||
json!({"message": "hello from plugin parity"}),
|
||||
),
|
||||
},
|
||||
Scenario::AutoCompactTriggered => text_message_response_with_usage(
|
||||
"msg_auto_compact_triggered",
|
||||
"auto compact parity complete.",
|
||||
50_000,
|
||||
200,
|
||||
),
|
||||
Scenario::TokenCostReporting => text_message_response_with_usage(
|
||||
"msg_token_cost_reporting",
|
||||
"token cost reporting parity complete.",
|
||||
1_000,
|
||||
500,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -625,6 +649,8 @@ fn request_id_for(scenario: Scenario) -> &'static str {
|
||||
Scenario::BashPermissionPromptApproved => "req_bash_permission_prompt_approved",
|
||||
Scenario::BashPermissionPromptDenied => "req_bash_permission_prompt_denied",
|
||||
Scenario::PluginToolRoundtrip => "req_plugin_tool_roundtrip",
|
||||
Scenario::AutoCompactTriggered => "req_auto_compact_triggered",
|
||||
Scenario::TokenCostReporting => "req_token_cost_reporting",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -661,6 +687,32 @@ fn text_message_response(id: &str, text: &str) -> MessageResponse {
|
||||
}
|
||||
}
|
||||
|
||||
fn text_message_response_with_usage(
|
||||
id: &str,
|
||||
text: &str,
|
||||
input_tokens: u32,
|
||||
output_tokens: u32,
|
||||
) -> MessageResponse {
|
||||
MessageResponse {
|
||||
id: id.to_string(),
|
||||
kind: "message".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content: vec![OutputContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
model: DEFAULT_MODEL.to_string(),
|
||||
stop_reason: Some("end_turn".to_string()),
|
||||
stop_sequence: None,
|
||||
usage: Usage {
|
||||
input_tokens,
|
||||
cache_creation_input_tokens: 0,
|
||||
cache_read_input_tokens: 0,
|
||||
output_tokens,
|
||||
},
|
||||
request_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_message_response(
|
||||
id: &str,
|
||||
tool_id: &str,
|
||||
@@ -919,6 +971,74 @@ fn final_text_sse(text: &str) -> String {
|
||||
body
|
||||
}
|
||||
|
||||
fn final_text_sse_with_usage(text: &str, input_tokens: u32, output_tokens: u32) -> String {
|
||||
let mut body = String::new();
|
||||
append_sse(
|
||||
&mut body,
|
||||
"message_start",
|
||||
json!({
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": unique_message_id(),
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": DEFAULT_MODEL,
|
||||
"stop_reason": null,
|
||||
"stop_sequence": null,
|
||||
"usage": {
|
||||
"input_tokens": input_tokens,
|
||||
"cache_creation_input_tokens": 0,
|
||||
"cache_read_input_tokens": 0,
|
||||
"output_tokens": 0
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
append_sse(
|
||||
&mut body,
|
||||
"content_block_start",
|
||||
json!({
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""}
|
||||
}),
|
||||
);
|
||||
append_sse(
|
||||
&mut body,
|
||||
"content_block_delta",
|
||||
json!({
|
||||
"type": "content_block_delta",
|
||||
"index": 0,
|
||||
"delta": {"type": "text_delta", "text": text}
|
||||
}),
|
||||
);
|
||||
append_sse(
|
||||
&mut body,
|
||||
"content_block_stop",
|
||||
json!({
|
||||
"type": "content_block_stop",
|
||||
"index": 0
|
||||
}),
|
||||
);
|
||||
append_sse(
|
||||
&mut body,
|
||||
"message_delta",
|
||||
json!({
|
||||
"type": "message_delta",
|
||||
"delta": {"stop_reason": "end_turn", "stop_sequence": null},
|
||||
"usage": {
|
||||
"input_tokens": input_tokens,
|
||||
"cache_creation_input_tokens": 0,
|
||||
"cache_read_input_tokens": 0,
|
||||
"output_tokens": output_tokens
|
||||
}
|
||||
}),
|
||||
);
|
||||
append_sse(&mut body, "message_stop", json!({"type": "message_stop"}));
|
||||
body
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
fn append_sse(buffer: &mut String, event: &str, payload: Value) {
|
||||
use std::fmt::Write as _;
|
||||
|
||||
@@ -46,7 +46,7 @@ use runtime::{
|
||||
ConversationRuntime, MessageRole, OAuthAuthorizationRequest, OAuthConfig,
|
||||
OAuthTokenExchangeRequest, PermissionMode, PermissionPolicy, ProjectContext, PromptCacheEvent,
|
||||
ResolvedPermissionMode, RuntimeError, Session, TokenUsage, ToolError, ToolExecutor,
|
||||
UsageTracker,
|
||||
UsageTracker, ModelPricing, format_usd, pricing_for_model,
|
||||
};
|
||||
use serde_json::json;
|
||||
use tools::GlobalToolRegistry;
|
||||
@@ -1899,7 +1899,13 @@ impl LiveCli {
|
||||
"output_tokens": summary.usage.output_tokens,
|
||||
"cache_creation_input_tokens": summary.usage.cache_creation_input_tokens,
|
||||
"cache_read_input_tokens": summary.usage.cache_read_input_tokens,
|
||||
}
|
||||
},
|
||||
"estimated_cost": format_usd(
|
||||
summary.usage.estimate_cost_usd_with_pricing(
|
||||
pricing_for_model(&self.model)
|
||||
.unwrap_or_else(runtime::ModelPricing::default_sonnet_tier)
|
||||
).total_cost_usd()
|
||||
)
|
||||
})
|
||||
);
|
||||
Ok(())
|
||||
|
||||
@@ -35,6 +35,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_streaming_text,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "read_file_roundtrip",
|
||||
@@ -43,6 +45,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_read_fixture,
|
||||
assert: assert_read_file_roundtrip,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "grep_chunk_assembly",
|
||||
@@ -51,6 +55,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_grep_fixture,
|
||||
assert: assert_grep_chunk_assembly,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "write_file_allowed",
|
||||
@@ -59,6 +65,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_write_file_allowed,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "write_file_denied",
|
||||
@@ -67,6 +75,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_write_file_denied,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "multi_tool_turn_roundtrip",
|
||||
@@ -75,6 +85,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_multi_tool_fixture,
|
||||
assert: assert_multi_tool_turn_roundtrip,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "bash_stdout_roundtrip",
|
||||
@@ -83,6 +95,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_bash_stdout_roundtrip,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "bash_permission_prompt_approved",
|
||||
@@ -91,6 +105,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: Some("y\n"),
|
||||
prepare: prepare_noop,
|
||||
assert: assert_bash_permission_prompt_approved,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "bash_permission_prompt_denied",
|
||||
@@ -99,6 +115,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: Some("n\n"),
|
||||
prepare: prepare_noop,
|
||||
assert: assert_bash_permission_prompt_denied,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "plugin_tool_roundtrip",
|
||||
@@ -107,6 +125,28 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
stdin: None,
|
||||
prepare: prepare_plugin_fixture,
|
||||
assert: assert_plugin_tool_roundtrip,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "auto_compact_triggered",
|
||||
permission_mode: "read-only",
|
||||
allowed_tools: None,
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_auto_compact_triggered,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
ScenarioCase {
|
||||
name: "token_cost_reporting",
|
||||
permission_mode: "read-only",
|
||||
allowed_tools: None,
|
||||
stdin: None,
|
||||
prepare: prepare_noop,
|
||||
assert: assert_token_cost_reporting,
|
||||
extra_env: None,
|
||||
resume_session: None,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -145,8 +185,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
let captured = runtime.block_on(server.captured_requests());
|
||||
assert_eq!(
|
||||
captured.len(),
|
||||
19,
|
||||
"ten scenarios should produce nineteen requests"
|
||||
21,
|
||||
"twelve scenarios should produce twenty-one requests"
|
||||
);
|
||||
assert!(captured
|
||||
.iter()
|
||||
@@ -179,6 +219,8 @@ fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios
|
||||
"bash_permission_prompt_denied",
|
||||
"plugin_tool_roundtrip",
|
||||
"plugin_tool_roundtrip",
|
||||
"auto_compact_triggered",
|
||||
"token_cost_reporting",
|
||||
]
|
||||
);
|
||||
|
||||
@@ -205,6 +247,8 @@ struct ScenarioCase {
|
||||
stdin: Option<&'static str>,
|
||||
prepare: fn(&HarnessWorkspace),
|
||||
assert: fn(&HarnessWorkspace, &ScenarioRun),
|
||||
extra_env: Option<(&'static str, &'static str)>,
|
||||
resume_session: Option<&'static str>,
|
||||
}
|
||||
|
||||
struct HarnessWorkspace {
|
||||
@@ -278,6 +322,12 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) ->
|
||||
if let Some(allowed_tools) = case.allowed_tools {
|
||||
command.args(["--allowedTools", allowed_tools]);
|
||||
}
|
||||
if let Some((key, value)) = case.extra_env {
|
||||
command.env(key, value);
|
||||
}
|
||||
if let Some(session_id) = case.resume_session {
|
||||
command.args(["--resume", session_id]);
|
||||
}
|
||||
|
||||
let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
|
||||
command.arg(prompt);
|
||||
@@ -308,6 +358,28 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) ->
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn prepare_auto_compact_fixture(workspace: &HarnessWorkspace) {
|
||||
let sessions_dir = workspace.root.join(".claw").join("sessions");
|
||||
fs::create_dir_all(&sessions_dir).expect("sessions dir should exist");
|
||||
|
||||
// Write a pre-seeded session with 6 messages so auto-compact can remove them
|
||||
let session_id = "parity-auto-compact-seed";
|
||||
let session_jsonl = r#"{"type":"session_meta","version":3,"session_id":"parity-auto-compact-seed","created_at_ms":1743724800000,"updated_at_ms":1743724800000}
|
||||
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step one of the parity scenario"}]}}
|
||||
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step one"}]}}
|
||||
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step two of the parity scenario"}]}}
|
||||
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step two"}]}}
|
||||
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step three of the parity scenario"}]}}
|
||||
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step three"}]}}
|
||||
"#;
|
||||
fs::write(
|
||||
sessions_dir.join(format!("{session_id}.jsonl")),
|
||||
session_jsonl,
|
||||
)
|
||||
.expect("pre-seeded session should write");
|
||||
}
|
||||
|
||||
fn prepare_noop(_: &HarnessWorkspace) {}
|
||||
|
||||
fn prepare_read_fixture(workspace: &HarnessWorkspace) {
|
||||
@@ -609,6 +681,59 @@ fn assert_plugin_tool_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
|
||||
.contains("hello from plugin parity"));
|
||||
}
|
||||
|
||||
fn assert_auto_compact_triggered(_: &HarnessWorkspace, run: &ScenarioRun) {
|
||||
// Validates that the auto_compaction field is present in JSON output (format parity).
|
||||
// Trigger behavior is covered by conversation::tests::auto_compacts_when_cumulative_input_threshold_is_crossed.
|
||||
assert_eq!(run.response["iterations"], Value::from(1));
|
||||
assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
|
||||
assert!(
|
||||
run.response["message"]
|
||||
.as_str()
|
||||
.expect("message text")
|
||||
.contains("auto compact parity complete."),
|
||||
"expected auto compact message in response"
|
||||
);
|
||||
// auto_compaction key must be present in JSON (may be null for below-threshold sessions)
|
||||
assert!(
|
||||
run.response.as_object().expect("response object").contains_key("auto_compaction"),
|
||||
"auto_compaction key must be present in JSON output"
|
||||
);
|
||||
// Verify input_tokens field reflects the large mock token counts
|
||||
let input_tokens = run.response["usage"]["input_tokens"]
|
||||
.as_u64()
|
||||
.expect("input_tokens should be present");
|
||||
assert!(
|
||||
input_tokens >= 50_000,
|
||||
"input_tokens should reflect mock service value (got {input_tokens})"
|
||||
);
|
||||
}
|
||||
|
||||
fn assert_token_cost_reporting(_: &HarnessWorkspace, run: &ScenarioRun) {
|
||||
assert_eq!(run.response["iterations"], Value::from(1));
|
||||
assert!(
|
||||
run.response["message"]
|
||||
.as_str()
|
||||
.expect("message text")
|
||||
.contains("token cost reporting parity complete."),
|
||||
);
|
||||
let usage = &run.response["usage"];
|
||||
assert!(
|
||||
usage["input_tokens"].as_u64().unwrap_or(0) > 0,
|
||||
"input_tokens should be non-zero"
|
||||
);
|
||||
assert!(
|
||||
usage["output_tokens"].as_u64().unwrap_or(0) > 0,
|
||||
"output_tokens should be non-zero"
|
||||
);
|
||||
assert!(
|
||||
run.response["estimated_cost"]
|
||||
.as_str()
|
||||
.map(|cost| cost.starts_with('$'))
|
||||
.unwrap_or(false),
|
||||
"estimated_cost should be a dollar-prefixed string"
|
||||
);
|
||||
}
|
||||
|
||||
fn parse_json_output(stdout: &str) -> Value {
|
||||
if let Some(index) = stdout.rfind("{\"auto_compaction\"") {
|
||||
return serde_json::from_str(&stdout[index..]).unwrap_or_else(|error| {
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"category": "baseline",
|
||||
"description": "Validates streamed assistant text with no tool calls.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 1",
|
||||
"Mock parity harness \u2014 milestone 1",
|
||||
"Streaming response support validated by the mock parity harness"
|
||||
]
|
||||
},
|
||||
@@ -13,8 +13,8 @@
|
||||
"category": "file-tools",
|
||||
"description": "Exercises read_file tool execution and final assistant synthesis.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 1",
|
||||
"File tools — harness-validated flows"
|
||||
"Mock parity harness \u2014 milestone 1",
|
||||
"File tools \u2014 harness-validated flows"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -22,8 +22,8 @@
|
||||
"category": "file-tools",
|
||||
"description": "Validates grep_search partial JSON chunk assembly and follow-up synthesis.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 1",
|
||||
"File tools — harness-validated flows"
|
||||
"Mock parity harness \u2014 milestone 1",
|
||||
"File tools \u2014 harness-validated flows"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -31,8 +31,8 @@
|
||||
"category": "file-tools",
|
||||
"description": "Confirms workspace-write write_file success and filesystem side effects.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 1",
|
||||
"File tools — harness-validated flows"
|
||||
"Mock parity harness \u2014 milestone 1",
|
||||
"File tools \u2014 harness-validated flows"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -40,7 +40,7 @@
|
||||
"category": "permissions",
|
||||
"description": "Confirms read-only mode blocks write_file with an error result.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 1",
|
||||
"Mock parity harness \u2014 milestone 1",
|
||||
"Permission enforcement across tool paths"
|
||||
]
|
||||
},
|
||||
@@ -49,7 +49,7 @@
|
||||
"category": "multi-tool-turns",
|
||||
"description": "Executes read_file and grep_search in the same assistant turn before the final reply.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 2 (behavioral expansion)",
|
||||
"Mock parity harness \u2014 milestone 2 (behavioral expansion)",
|
||||
"Multi-tool assistant turns"
|
||||
]
|
||||
},
|
||||
@@ -58,8 +58,8 @@
|
||||
"category": "bash",
|
||||
"description": "Validates bash execution and stdout roundtrip in danger-full-access mode.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 2 (behavioral expansion)",
|
||||
"Bash tool — upstream has 18 submodules, Rust has 1:"
|
||||
"Mock parity harness \u2014 milestone 2 (behavioral expansion)",
|
||||
"Bash tool \u2014 upstream has 18 submodules, Rust has 1:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,7 +67,7 @@
|
||||
"category": "permissions",
|
||||
"description": "Exercises workspace-write to bash escalation with a positive approval response.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 2 (behavioral expansion)",
|
||||
"Mock parity harness \u2014 milestone 2 (behavioral expansion)",
|
||||
"Permission enforcement across tool paths"
|
||||
]
|
||||
},
|
||||
@@ -76,7 +76,7 @@
|
||||
"category": "permissions",
|
||||
"description": "Exercises workspace-write to bash escalation with a denied approval response.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 2 (behavioral expansion)",
|
||||
"Mock parity harness \u2014 milestone 2 (behavioral expansion)",
|
||||
"Permission enforcement across tool paths"
|
||||
]
|
||||
},
|
||||
@@ -85,8 +85,25 @@
|
||||
"category": "plugin-paths",
|
||||
"description": "Loads an external plugin tool and executes it through the runtime tool registry.",
|
||||
"parity_refs": [
|
||||
"Mock parity harness — milestone 2 (behavioral expansion)",
|
||||
"Mock parity harness \u2014 milestone 2 (behavioral expansion)",
|
||||
"Plugin tool execution path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "auto_compact_triggered",
|
||||
"category": "session-compaction",
|
||||
"description": "Verifies auto-compact fires when cumulative input tokens exceed the configured threshold.",
|
||||
"parity_refs": [
|
||||
"Session compaction behavior matching",
|
||||
"auto_compaction threshold from env"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "token_cost_reporting",
|
||||
"category": "token-usage",
|
||||
"description": "Confirms usage token counts and estimated_cost appear in JSON output.",
|
||||
"parity_refs": [
|
||||
"Token counting / cost tracking accuracy"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user