mirror of
https://github.com/instructkr/claude-code.git
synced 2026-04-03 15:38:50 +03:00
Extend the Rust tools crate with concrete TodoWrite and Skill implementations. TodoWrite now validates and persists structured session todos with Claude Code-aligned item shapes, while Skill resolves local skill definitions and returns their prompt payload for execution handoff. Tests cover persistence and local skill loading without disturbing the previously added web tools.\n\nConstraint: Stay within tools-only scope and avoid depending on broader agent/runtime rewrites\nConstraint: Keep exposed tool names and schemas close to Claude Code contracts\nRejected: In-memory-only TodoWrite state | would not survive across tool calls\nRejected: Stub Skill metadata without loading prompt content | not materially useful to callers\nConfidence: medium\nScope-risk: narrow\nReversibility: clean\nDirective: Preserve TodoWrite item-field parity and keep Skill focused on local skill discovery until agent execution wiring lands\nTested: cargo fmt; cargo test -p tools\nNot-tested: cargo clippy; full workspace cargo test
1176 lines
37 KiB
Rust
1176 lines
37 KiB
Rust
use std::collections::BTreeSet;
|
|
use std::time::{Duration, Instant};
|
|
|
|
use reqwest::blocking::Client;
|
|
use runtime::{
|
|
edit_file, execute_bash, glob_search, grep_search, read_file, write_file, BashCommandInput,
|
|
GrepSearchInput,
|
|
};
|
|
use serde::{Deserialize, Serialize};
|
|
use serde_json::{json, Value};
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct ToolManifestEntry {
|
|
pub name: String,
|
|
pub source: ToolSource,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum ToolSource {
|
|
Base,
|
|
Conditional,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
|
pub struct ToolRegistry {
|
|
entries: Vec<ToolManifestEntry>,
|
|
}
|
|
|
|
impl ToolRegistry {
|
|
#[must_use]
|
|
pub fn new(entries: Vec<ToolManifestEntry>) -> Self {
|
|
Self { entries }
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn entries(&self) -> &[ToolManifestEntry] {
|
|
&self.entries
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct ToolSpec {
|
|
pub name: &'static str,
|
|
pub description: &'static str,
|
|
pub input_schema: Value,
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn mvp_tool_specs() -> Vec<ToolSpec> {
|
|
vec![
|
|
ToolSpec {
|
|
name: "bash",
|
|
description: "Execute a shell command in the current workspace.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"command": { "type": "string" },
|
|
"timeout": { "type": "integer", "minimum": 1 },
|
|
"description": { "type": "string" },
|
|
"run_in_background": { "type": "boolean" },
|
|
"dangerouslyDisableSandbox": { "type": "boolean" }
|
|
},
|
|
"required": ["command"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "read_file",
|
|
description: "Read a text file from the workspace.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"path": { "type": "string" },
|
|
"offset": { "type": "integer", "minimum": 0 },
|
|
"limit": { "type": "integer", "minimum": 1 }
|
|
},
|
|
"required": ["path"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "write_file",
|
|
description: "Write a text file in the workspace.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"path": { "type": "string" },
|
|
"content": { "type": "string" }
|
|
},
|
|
"required": ["path", "content"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "edit_file",
|
|
description: "Replace text in a workspace file.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"path": { "type": "string" },
|
|
"old_string": { "type": "string" },
|
|
"new_string": { "type": "string" },
|
|
"replace_all": { "type": "boolean" }
|
|
},
|
|
"required": ["path", "old_string", "new_string"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "glob_search",
|
|
description: "Find files by glob pattern.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"pattern": { "type": "string" },
|
|
"path": { "type": "string" }
|
|
},
|
|
"required": ["pattern"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "grep_search",
|
|
description: "Search file contents with a regex pattern.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"pattern": { "type": "string" },
|
|
"path": { "type": "string" },
|
|
"glob": { "type": "string" },
|
|
"output_mode": { "type": "string" },
|
|
"-B": { "type": "integer", "minimum": 0 },
|
|
"-A": { "type": "integer", "minimum": 0 },
|
|
"-C": { "type": "integer", "minimum": 0 },
|
|
"context": { "type": "integer", "minimum": 0 },
|
|
"-n": { "type": "boolean" },
|
|
"-i": { "type": "boolean" },
|
|
"type": { "type": "string" },
|
|
"head_limit": { "type": "integer", "minimum": 1 },
|
|
"offset": { "type": "integer", "minimum": 0 },
|
|
"multiline": { "type": "boolean" }
|
|
},
|
|
"required": ["pattern"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "WebFetch",
|
|
description:
|
|
"Fetch a URL, convert it into readable text, and answer a prompt about it.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"url": { "type": "string", "format": "uri" },
|
|
"prompt": { "type": "string" }
|
|
},
|
|
"required": ["url", "prompt"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "WebSearch",
|
|
description: "Search the web for current information and return cited results.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"query": { "type": "string", "minLength": 2 },
|
|
"allowed_domains": {
|
|
"type": "array",
|
|
"items": { "type": "string" }
|
|
},
|
|
"blocked_domains": {
|
|
"type": "array",
|
|
"items": { "type": "string" }
|
|
}
|
|
},
|
|
"required": ["query"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "TodoWrite",
|
|
description: "Update the structured task list for the current session.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"todos": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"content": { "type": "string" },
|
|
"activeForm": { "type": "string" },
|
|
"status": {
|
|
"type": "string",
|
|
"enum": ["pending", "in_progress", "completed"]
|
|
}
|
|
},
|
|
"required": ["content", "activeForm", "status"],
|
|
"additionalProperties": false
|
|
}
|
|
}
|
|
},
|
|
"required": ["todos"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
ToolSpec {
|
|
name: "Skill",
|
|
description: "Load a local skill definition and its instructions.",
|
|
input_schema: json!({
|
|
"type": "object",
|
|
"properties": {
|
|
"skill": { "type": "string" },
|
|
"args": { "type": "string" }
|
|
},
|
|
"required": ["skill"],
|
|
"additionalProperties": false
|
|
}),
|
|
},
|
|
]
|
|
}
|
|
|
|
pub fn execute_tool(name: &str, input: &Value) -> Result<String, String> {
|
|
match name {
|
|
"bash" => from_value::<BashCommandInput>(input).and_then(run_bash),
|
|
"read_file" => from_value::<ReadFileInput>(input).and_then(run_read_file),
|
|
"write_file" => from_value::<WriteFileInput>(input).and_then(run_write_file),
|
|
"edit_file" => from_value::<EditFileInput>(input).and_then(run_edit_file),
|
|
"glob_search" => from_value::<GlobSearchInputValue>(input).and_then(run_glob_search),
|
|
"grep_search" => from_value::<GrepSearchInput>(input).and_then(run_grep_search),
|
|
"WebFetch" => from_value::<WebFetchInput>(input).and_then(run_web_fetch),
|
|
"WebSearch" => from_value::<WebSearchInput>(input).and_then(run_web_search),
|
|
"TodoWrite" => from_value::<TodoWriteInput>(input).and_then(run_todo_write),
|
|
"Skill" => from_value::<SkillInput>(input).and_then(run_skill),
|
|
_ => Err(format!("unsupported tool: {name}")),
|
|
}
|
|
}
|
|
|
|
fn from_value<T: for<'de> Deserialize<'de>>(input: &Value) -> Result<T, String> {
|
|
serde_json::from_value(input.clone()).map_err(|error| error.to_string())
|
|
}
|
|
|
|
fn run_bash(input: BashCommandInput) -> Result<String, String> {
|
|
serde_json::to_string_pretty(&execute_bash(input).map_err(|error| error.to_string())?)
|
|
.map_err(|error| error.to_string())
|
|
}
|
|
|
|
fn run_read_file(input: ReadFileInput) -> Result<String, String> {
|
|
to_pretty_json(read_file(&input.path, input.offset, input.limit).map_err(io_to_string)?)
|
|
}
|
|
|
|
fn run_write_file(input: WriteFileInput) -> Result<String, String> {
|
|
to_pretty_json(write_file(&input.path, &input.content).map_err(io_to_string)?)
|
|
}
|
|
|
|
fn run_edit_file(input: EditFileInput) -> Result<String, String> {
|
|
to_pretty_json(
|
|
edit_file(
|
|
&input.path,
|
|
&input.old_string,
|
|
&input.new_string,
|
|
input.replace_all.unwrap_or(false),
|
|
)
|
|
.map_err(io_to_string)?,
|
|
)
|
|
}
|
|
|
|
fn run_glob_search(input: GlobSearchInputValue) -> Result<String, String> {
|
|
to_pretty_json(glob_search(&input.pattern, input.path.as_deref()).map_err(io_to_string)?)
|
|
}
|
|
|
|
fn run_grep_search(input: GrepSearchInput) -> Result<String, String> {
|
|
to_pretty_json(grep_search(&input).map_err(io_to_string)?)
|
|
}
|
|
|
|
fn run_web_fetch(input: WebFetchInput) -> Result<String, String> {
|
|
to_pretty_json(execute_web_fetch(&input)?)
|
|
}
|
|
|
|
fn run_web_search(input: WebSearchInput) -> Result<String, String> {
|
|
to_pretty_json(execute_web_search(&input)?)
|
|
}
|
|
|
|
fn run_todo_write(input: TodoWriteInput) -> Result<String, String> {
|
|
to_pretty_json(execute_todo_write(input)?)
|
|
}
|
|
|
|
fn run_skill(input: SkillInput) -> Result<String, String> {
|
|
to_pretty_json(execute_skill(input)?)
|
|
}
|
|
|
|
fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
|
|
serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
|
|
}
|
|
|
|
fn io_to_string(error: std::io::Error) -> String {
|
|
error.to_string()
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct ReadFileInput {
|
|
path: String,
|
|
offset: Option<usize>,
|
|
limit: Option<usize>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct WriteFileInput {
|
|
path: String,
|
|
content: String,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct EditFileInput {
|
|
path: String,
|
|
old_string: String,
|
|
new_string: String,
|
|
replace_all: Option<bool>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct GlobSearchInputValue {
|
|
pattern: String,
|
|
path: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct WebFetchInput {
|
|
url: String,
|
|
prompt: String,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct WebSearchInput {
|
|
query: String,
|
|
allowed_domains: Option<Vec<String>>,
|
|
blocked_domains: Option<Vec<String>>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct TodoWriteInput {
|
|
todos: Vec<TodoItem>,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
|
|
struct TodoItem {
|
|
content: String,
|
|
#[serde(rename = "activeForm")]
|
|
active_form: String,
|
|
status: TodoStatus,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
|
|
#[serde(rename_all = "snake_case")]
|
|
enum TodoStatus {
|
|
Pending,
|
|
InProgress,
|
|
Completed,
|
|
}
|
|
|
|
#[derive(Debug, Deserialize)]
|
|
struct SkillInput {
|
|
skill: String,
|
|
args: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct WebFetchOutput {
|
|
bytes: usize,
|
|
code: u16,
|
|
#[serde(rename = "codeText")]
|
|
code_text: String,
|
|
result: String,
|
|
#[serde(rename = "durationMs")]
|
|
duration_ms: u128,
|
|
url: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct WebSearchOutput {
|
|
query: String,
|
|
results: Vec<WebSearchResultItem>,
|
|
#[serde(rename = "durationSeconds")]
|
|
duration_seconds: f64,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct TodoWriteOutput {
|
|
#[serde(rename = "oldTodos")]
|
|
old_todos: Vec<TodoItem>,
|
|
#[serde(rename = "newTodos")]
|
|
new_todos: Vec<TodoItem>,
|
|
#[serde(rename = "verificationNudgeNeeded")]
|
|
verification_nudge_needed: Option<bool>,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct SkillOutput {
|
|
skill: String,
|
|
path: String,
|
|
args: Option<String>,
|
|
description: Option<String>,
|
|
prompt: String,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
#[serde(untagged)]
|
|
enum WebSearchResultItem {
|
|
SearchResult {
|
|
tool_use_id: String,
|
|
content: Vec<SearchHit>,
|
|
},
|
|
Commentary(String),
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct SearchHit {
|
|
title: String,
|
|
url: String,
|
|
}
|
|
|
|
fn execute_web_fetch(input: &WebFetchInput) -> Result<WebFetchOutput, String> {
|
|
let started = Instant::now();
|
|
let client = build_http_client()?;
|
|
let request_url = normalize_fetch_url(&input.url)?;
|
|
let response = client
|
|
.get(request_url.clone())
|
|
.send()
|
|
.map_err(|error| error.to_string())?;
|
|
|
|
let status = response.status();
|
|
let final_url = response.url().to_string();
|
|
let code = status.as_u16();
|
|
let code_text = status.canonical_reason().unwrap_or("Unknown").to_string();
|
|
let content_type = response
|
|
.headers()
|
|
.get(reqwest::header::CONTENT_TYPE)
|
|
.and_then(|value| value.to_str().ok())
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
let body = response.text().map_err(|error| error.to_string())?;
|
|
let bytes = body.len();
|
|
let normalized = normalize_fetched_content(&body, &content_type);
|
|
let result = summarize_web_fetch(&final_url, &input.prompt, &normalized);
|
|
|
|
Ok(WebFetchOutput {
|
|
bytes,
|
|
code,
|
|
code_text,
|
|
result,
|
|
duration_ms: started.elapsed().as_millis(),
|
|
url: final_url,
|
|
})
|
|
}
|
|
|
|
fn execute_web_search(input: &WebSearchInput) -> Result<WebSearchOutput, String> {
|
|
let started = Instant::now();
|
|
let client = build_http_client()?;
|
|
let search_url = build_search_url(&input.query)?;
|
|
let response = client
|
|
.get(search_url)
|
|
.send()
|
|
.map_err(|error| error.to_string())?;
|
|
|
|
let final_url = response.url().clone();
|
|
let html = response.text().map_err(|error| error.to_string())?;
|
|
let mut hits = extract_search_hits(&html);
|
|
|
|
if hits.is_empty() && final_url.host_str().is_some() {
|
|
hits = extract_search_hits_from_generic_links(&html);
|
|
}
|
|
|
|
if let Some(allowed) = input.allowed_domains.as_ref() {
|
|
hits.retain(|hit| host_matches_list(&hit.url, allowed));
|
|
}
|
|
if let Some(blocked) = input.blocked_domains.as_ref() {
|
|
hits.retain(|hit| !host_matches_list(&hit.url, blocked));
|
|
}
|
|
|
|
dedupe_hits(&mut hits);
|
|
hits.truncate(8);
|
|
|
|
let summary = if hits.is_empty() {
|
|
format!("No web search results matched the query {:?}.", input.query)
|
|
} else {
|
|
let rendered_hits = hits
|
|
.iter()
|
|
.map(|hit| format!("- [{}]({})", hit.title, hit.url))
|
|
.collect::<Vec<_>>()
|
|
.join("\n");
|
|
format!(
|
|
"Search results for {:?}. Include a Sources section in the final answer.\n{}",
|
|
input.query, rendered_hits
|
|
)
|
|
};
|
|
|
|
Ok(WebSearchOutput {
|
|
query: input.query.clone(),
|
|
results: vec![
|
|
WebSearchResultItem::Commentary(summary),
|
|
WebSearchResultItem::SearchResult {
|
|
tool_use_id: String::from("web_search_1"),
|
|
content: hits,
|
|
},
|
|
],
|
|
duration_seconds: started.elapsed().as_secs_f64(),
|
|
})
|
|
}
|
|
|
|
fn build_http_client() -> Result<Client, String> {
|
|
Client::builder()
|
|
.timeout(Duration::from_secs(20))
|
|
.redirect(reqwest::redirect::Policy::limited(10))
|
|
.user_agent("clawd-rust-tools/0.1")
|
|
.build()
|
|
.map_err(|error| error.to_string())
|
|
}
|
|
|
|
fn normalize_fetch_url(url: &str) -> Result<String, String> {
|
|
let parsed = reqwest::Url::parse(url).map_err(|error| error.to_string())?;
|
|
if parsed.scheme() == "http" {
|
|
let host = parsed.host_str().unwrap_or_default();
|
|
if host != "localhost" && host != "127.0.0.1" && host != "::1" {
|
|
let mut upgraded = parsed;
|
|
upgraded
|
|
.set_scheme("https")
|
|
.map_err(|_| String::from("failed to upgrade URL to https"))?;
|
|
return Ok(upgraded.to_string());
|
|
}
|
|
}
|
|
Ok(parsed.to_string())
|
|
}
|
|
|
|
fn build_search_url(query: &str) -> Result<reqwest::Url, String> {
|
|
if let Ok(base) = std::env::var("CLAWD_WEB_SEARCH_BASE_URL") {
|
|
let mut url = reqwest::Url::parse(&base).map_err(|error| error.to_string())?;
|
|
url.query_pairs_mut().append_pair("q", query);
|
|
return Ok(url);
|
|
}
|
|
|
|
let mut url = reqwest::Url::parse("https://html.duckduckgo.com/html/")
|
|
.map_err(|error| error.to_string())?;
|
|
url.query_pairs_mut().append_pair("q", query);
|
|
Ok(url)
|
|
}
|
|
|
|
fn normalize_fetched_content(body: &str, content_type: &str) -> String {
|
|
if content_type.contains("html") {
|
|
html_to_text(body)
|
|
} else {
|
|
body.trim().to_string()
|
|
}
|
|
}
|
|
|
|
fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
|
|
let lower_prompt = prompt.to_lowercase();
|
|
let compact = collapse_whitespace(content);
|
|
|
|
let detail = if lower_prompt.contains("title") {
|
|
extract_title(content)
|
|
.map(|title| format!("Title: {title}"))
|
|
.unwrap_or_else(|| preview_text(&compact, 600))
|
|
} else if lower_prompt.contains("summary") || lower_prompt.contains("summarize") {
|
|
preview_text(&compact, 900)
|
|
} else {
|
|
let preview = preview_text(&compact, 900);
|
|
format!("Prompt: {prompt}\nContent preview:\n{preview}")
|
|
};
|
|
|
|
format!("Fetched {url}\n{detail}")
|
|
}
|
|
|
|
fn extract_title(content: &str) -> Option<String> {
|
|
for line in content.lines() {
|
|
let trimmed = line.trim();
|
|
if !trimmed.is_empty() {
|
|
return Some(trimmed.to_string());
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
fn html_to_text(html: &str) -> String {
|
|
let mut text = String::with_capacity(html.len());
|
|
let mut in_tag = false;
|
|
let mut previous_was_space = false;
|
|
|
|
for ch in html.chars() {
|
|
match ch {
|
|
'<' => in_tag = true,
|
|
'>' => in_tag = false,
|
|
_ if in_tag => {}
|
|
'&' => {
|
|
text.push('&');
|
|
previous_was_space = false;
|
|
}
|
|
ch if ch.is_whitespace() => {
|
|
if !previous_was_space {
|
|
text.push(' ');
|
|
previous_was_space = true;
|
|
}
|
|
}
|
|
_ => {
|
|
text.push(ch);
|
|
previous_was_space = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
collapse_whitespace(&decode_html_entities(&text))
|
|
}
|
|
|
|
fn decode_html_entities(input: &str) -> String {
|
|
input
|
|
.replace("&", "&")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
.replace(""", "\"")
|
|
.replace("'", "'")
|
|
.replace(" ", " ")
|
|
}
|
|
|
|
fn collapse_whitespace(input: &str) -> String {
|
|
input.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn preview_text(input: &str, max_chars: usize) -> String {
|
|
if input.chars().count() <= max_chars {
|
|
return input.to_string();
|
|
}
|
|
let shortened = input.chars().take(max_chars).collect::<String>();
|
|
format!("{}…", shortened.trim_end())
|
|
}
|
|
|
|
fn extract_search_hits(html: &str) -> Vec<SearchHit> {
|
|
let mut hits = Vec::new();
|
|
let mut remaining = html;
|
|
|
|
while let Some(anchor_start) = remaining.find("result__a") {
|
|
let after_class = &remaining[anchor_start..];
|
|
let Some(href_idx) = after_class.find("href=") else {
|
|
remaining = &after_class[1..];
|
|
continue;
|
|
};
|
|
let href_slice = &after_class[href_idx + 5..];
|
|
let Some((url, rest)) = extract_quoted_value(href_slice) else {
|
|
remaining = &after_class[1..];
|
|
continue;
|
|
};
|
|
let Some(close_tag_idx) = rest.find('>') else {
|
|
remaining = &after_class[1..];
|
|
continue;
|
|
};
|
|
let after_tag = &rest[close_tag_idx + 1..];
|
|
let Some(end_anchor_idx) = after_tag.find("</a>") else {
|
|
remaining = &after_tag[1..];
|
|
continue;
|
|
};
|
|
let title = html_to_text(&after_tag[..end_anchor_idx]);
|
|
if let Some(decoded_url) = decode_duckduckgo_redirect(&url) {
|
|
hits.push(SearchHit {
|
|
title: title.trim().to_string(),
|
|
url: decoded_url,
|
|
});
|
|
}
|
|
remaining = &after_tag[end_anchor_idx + 4..];
|
|
}
|
|
|
|
hits
|
|
}
|
|
|
|
fn extract_search_hits_from_generic_links(html: &str) -> Vec<SearchHit> {
|
|
let mut hits = Vec::new();
|
|
let mut remaining = html;
|
|
|
|
while let Some(anchor_start) = remaining.find("<a") {
|
|
let after_anchor = &remaining[anchor_start..];
|
|
let Some(href_idx) = after_anchor.find("href=") else {
|
|
remaining = &after_anchor[2..];
|
|
continue;
|
|
};
|
|
let href_slice = &after_anchor[href_idx + 5..];
|
|
let Some((url, rest)) = extract_quoted_value(href_slice) else {
|
|
remaining = &after_anchor[2..];
|
|
continue;
|
|
};
|
|
let Some(close_tag_idx) = rest.find('>') else {
|
|
remaining = &after_anchor[2..];
|
|
continue;
|
|
};
|
|
let after_tag = &rest[close_tag_idx + 1..];
|
|
let Some(end_anchor_idx) = after_tag.find("</a>") else {
|
|
remaining = &after_anchor[2..];
|
|
continue;
|
|
};
|
|
let title = html_to_text(&after_tag[..end_anchor_idx]);
|
|
if title.trim().is_empty() {
|
|
remaining = &after_tag[end_anchor_idx + 4..];
|
|
continue;
|
|
}
|
|
let decoded_url = decode_duckduckgo_redirect(&url).unwrap_or(url);
|
|
if decoded_url.starts_with("http://") || decoded_url.starts_with("https://") {
|
|
hits.push(SearchHit {
|
|
title: title.trim().to_string(),
|
|
url: decoded_url,
|
|
});
|
|
}
|
|
remaining = &after_tag[end_anchor_idx + 4..];
|
|
}
|
|
|
|
hits
|
|
}
|
|
|
|
fn extract_quoted_value(input: &str) -> Option<(String, &str)> {
|
|
let quote = input.chars().next()?;
|
|
if quote != '"' && quote != '\'' {
|
|
return None;
|
|
}
|
|
let rest = &input[quote.len_utf8()..];
|
|
let end = rest.find(quote)?;
|
|
Some((rest[..end].to_string(), &rest[end + quote.len_utf8()..]))
|
|
}
|
|
|
|
fn decode_duckduckgo_redirect(url: &str) -> Option<String> {
|
|
if url.starts_with("http://") || url.starts_with("https://") {
|
|
return Some(html_entity_decode_url(url));
|
|
}
|
|
|
|
let joined = if url.starts_with("//") {
|
|
format!("https:{url}")
|
|
} else if url.starts_with('/') {
|
|
format!("https://duckduckgo.com{url}")
|
|
} else {
|
|
return None;
|
|
};
|
|
|
|
let parsed = reqwest::Url::parse(&joined).ok()?;
|
|
if parsed.path() == "/l/" || parsed.path() == "/l" {
|
|
for (key, value) in parsed.query_pairs() {
|
|
if key == "uddg" {
|
|
return Some(html_entity_decode_url(value.as_ref()));
|
|
}
|
|
}
|
|
}
|
|
Some(joined)
|
|
}
|
|
|
|
fn html_entity_decode_url(url: &str) -> String {
|
|
decode_html_entities(url)
|
|
}
|
|
|
|
fn host_matches_list(url: &str, domains: &[String]) -> bool {
|
|
let Ok(parsed) = reqwest::Url::parse(url) else {
|
|
return false;
|
|
};
|
|
let Some(host) = parsed.host_str() else {
|
|
return false;
|
|
};
|
|
domains.iter().any(|domain| {
|
|
let normalized = domain.trim().trim_start_matches('.');
|
|
host == normalized || host.ends_with(&format!(".{normalized}"))
|
|
})
|
|
}
|
|
|
|
fn dedupe_hits(hits: &mut Vec<SearchHit>) {
|
|
let mut seen = BTreeSet::new();
|
|
hits.retain(|hit| seen.insert(hit.url.clone()));
|
|
}
|
|
|
|
fn execute_todo_write(input: TodoWriteInput) -> Result<TodoWriteOutput, String> {
|
|
validate_todos(&input.todos)?;
|
|
let store_path = todo_store_path()?;
|
|
let old_todos = if store_path.exists() {
|
|
serde_json::from_str::<Vec<TodoItem>>(
|
|
&std::fs::read_to_string(&store_path).map_err(|error| error.to_string())?,
|
|
)
|
|
.map_err(|error| error.to_string())?
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
|
|
let all_done = input
|
|
.todos
|
|
.iter()
|
|
.all(|todo| matches!(todo.status, TodoStatus::Completed));
|
|
let persisted = if all_done {
|
|
Vec::new()
|
|
} else {
|
|
input.todos.clone()
|
|
};
|
|
|
|
if let Some(parent) = store_path.parent() {
|
|
std::fs::create_dir_all(parent).map_err(|error| error.to_string())?;
|
|
}
|
|
std::fs::write(
|
|
&store_path,
|
|
serde_json::to_string_pretty(&persisted).map_err(|error| error.to_string())?,
|
|
)
|
|
.map_err(|error| error.to_string())?;
|
|
|
|
let verification_nudge_needed = (all_done
|
|
&& input.todos.len() >= 3
|
|
&& !input
|
|
.todos
|
|
.iter()
|
|
.any(|todo| todo.content.to_lowercase().contains("verif")))
|
|
.then_some(true);
|
|
|
|
Ok(TodoWriteOutput {
|
|
old_todos,
|
|
new_todos: input.todos,
|
|
verification_nudge_needed,
|
|
})
|
|
}
|
|
|
|
fn execute_skill(input: SkillInput) -> Result<SkillOutput, String> {
|
|
let skill_path = resolve_skill_path(&input.skill)?;
|
|
let prompt = std::fs::read_to_string(&skill_path).map_err(|error| error.to_string())?;
|
|
let description = parse_skill_description(&prompt);
|
|
|
|
Ok(SkillOutput {
|
|
skill: input.skill,
|
|
path: skill_path.display().to_string(),
|
|
args: input.args,
|
|
description,
|
|
prompt,
|
|
})
|
|
}
|
|
|
|
fn validate_todos(todos: &[TodoItem]) -> Result<(), String> {
|
|
if todos.is_empty() {
|
|
return Err(String::from("todos must not be empty"));
|
|
}
|
|
let in_progress = todos
|
|
.iter()
|
|
.filter(|todo| matches!(todo.status, TodoStatus::InProgress))
|
|
.count();
|
|
if in_progress > 1 {
|
|
return Err(String::from(
|
|
"exactly zero or one todo items may be in_progress",
|
|
));
|
|
}
|
|
if todos.iter().any(|todo| todo.content.trim().is_empty()) {
|
|
return Err(String::from("todo content must not be empty"));
|
|
}
|
|
if todos.iter().any(|todo| todo.active_form.trim().is_empty()) {
|
|
return Err(String::from("todo activeForm must not be empty"));
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn todo_store_path() -> Result<std::path::PathBuf, String> {
|
|
if let Ok(path) = std::env::var("CLAWD_TODO_STORE") {
|
|
return Ok(std::path::PathBuf::from(path));
|
|
}
|
|
let cwd = std::env::current_dir().map_err(|error| error.to_string())?;
|
|
Ok(cwd.join(".clawd-todos.json"))
|
|
}
|
|
|
|
fn resolve_skill_path(skill: &str) -> Result<std::path::PathBuf, String> {
|
|
let requested = skill.trim().trim_start_matches('/');
|
|
if requested.is_empty() {
|
|
return Err(String::from("skill must not be empty"));
|
|
}
|
|
|
|
let mut candidates = Vec::new();
|
|
if let Ok(codex_home) = std::env::var("CODEX_HOME") {
|
|
candidates.push(std::path::PathBuf::from(codex_home).join("skills"));
|
|
}
|
|
candidates.push(std::path::PathBuf::from("/home/bellman/.codex/skills"));
|
|
|
|
for root in candidates {
|
|
let direct = root.join(requested).join("SKILL.md");
|
|
if direct.exists() {
|
|
return Ok(direct);
|
|
}
|
|
|
|
if let Ok(entries) = std::fs::read_dir(&root) {
|
|
for entry in entries.flatten() {
|
|
let path = entry.path().join("SKILL.md");
|
|
if !path.exists() {
|
|
continue;
|
|
}
|
|
if entry
|
|
.file_name()
|
|
.to_string_lossy()
|
|
.eq_ignore_ascii_case(requested)
|
|
{
|
|
return Ok(path);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Err(format!("unknown skill: {requested}"))
|
|
}
|
|
|
|
fn parse_skill_description(contents: &str) -> Option<String> {
|
|
for line in contents.lines() {
|
|
if let Some(value) = line.strip_prefix("description:") {
|
|
let trimmed = value.trim();
|
|
if !trimmed.is_empty() {
|
|
return Some(trimmed.to_string());
|
|
}
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use std::io::{Read, Write};
|
|
use std::net::{SocketAddr, TcpListener};
|
|
use std::sync::Arc;
|
|
use std::thread;
|
|
use std::time::Duration;
|
|
|
|
use super::{execute_tool, mvp_tool_specs};
|
|
use serde_json::json;
|
|
|
|
#[test]
|
|
fn exposes_mvp_tools() {
|
|
let names = mvp_tool_specs()
|
|
.into_iter()
|
|
.map(|spec| spec.name)
|
|
.collect::<Vec<_>>();
|
|
assert!(names.contains(&"bash"));
|
|
assert!(names.contains(&"read_file"));
|
|
assert!(names.contains(&"WebFetch"));
|
|
assert!(names.contains(&"WebSearch"));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_unknown_tool_names() {
|
|
let error = execute_tool("nope", &json!({})).expect_err("tool should be rejected");
|
|
assert!(error.contains("unsupported tool"));
|
|
}
|
|
|
|
#[test]
|
|
fn web_fetch_returns_prompt_aware_summary() {
|
|
let server = TestServer::spawn(Arc::new(|request_line: &str| {
|
|
assert!(request_line.starts_with("GET /page "));
|
|
HttpResponse::html(
|
|
200,
|
|
"OK",
|
|
"<html><head><title>Ignored</title></head><body><h1>Test Page</h1><p>Hello <b>world</b> from local server.</p></body></html>",
|
|
)
|
|
}));
|
|
|
|
let result = execute_tool(
|
|
"WebFetch",
|
|
&json!({
|
|
"url": format!("http://{}/page", server.addr()),
|
|
"prompt": "Summarize this page"
|
|
}),
|
|
)
|
|
.expect("WebFetch should succeed");
|
|
|
|
let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
|
|
assert_eq!(output["code"], 200);
|
|
let summary = output["result"].as_str().expect("result string");
|
|
assert!(summary.contains("Fetched"));
|
|
assert!(summary.contains("Test Page"));
|
|
assert!(summary.contains("Hello world from local server"));
|
|
}
|
|
|
|
#[test]
|
|
fn web_search_extracts_and_filters_results() {
|
|
let server = TestServer::spawn(Arc::new(|request_line: &str| {
|
|
assert!(request_line.contains("GET /search?q=rust+web+search "));
|
|
HttpResponse::html(
|
|
200,
|
|
"OK",
|
|
r#"
|
|
<html><body>
|
|
<a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a>
|
|
<a class="result__a" href="https://example.com/blocked">Blocked result</a>
|
|
</body></html>
|
|
"#,
|
|
)
|
|
}));
|
|
|
|
std::env::set_var(
|
|
"CLAWD_WEB_SEARCH_BASE_URL",
|
|
format!("http://{}/search", server.addr()),
|
|
);
|
|
let result = execute_tool(
|
|
"WebSearch",
|
|
&json!({
|
|
"query": "rust web search",
|
|
"allowed_domains": ["docs.rs"],
|
|
"blocked_domains": ["example.com"]
|
|
}),
|
|
)
|
|
.expect("WebSearch should succeed");
|
|
std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL");
|
|
|
|
let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
|
|
assert_eq!(output["query"], "rust web search");
|
|
let results = output["results"].as_array().expect("results array");
|
|
let search_result = results
|
|
.iter()
|
|
.find(|item| item.get("content").is_some())
|
|
.expect("search result block present");
|
|
let content = search_result["content"].as_array().expect("content array");
|
|
assert_eq!(content.len(), 1);
|
|
assert_eq!(content[0]["title"], "Reqwest docs");
|
|
assert_eq!(content[0]["url"], "https://docs.rs/reqwest");
|
|
}
|
|
|
|
#[test]
|
|
fn todo_write_persists_and_returns_previous_state() {
|
|
let path = std::env::temp_dir().join(format!(
|
|
"clawd-tools-todos-{}.json",
|
|
std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.expect("time")
|
|
.as_nanos()
|
|
));
|
|
std::env::set_var("CLAWD_TODO_STORE", &path);
|
|
|
|
let first = execute_tool(
|
|
"TodoWrite",
|
|
&json!({
|
|
"todos": [
|
|
{"content": "Add tool", "activeForm": "Adding tool", "status": "in_progress"},
|
|
{"content": "Run tests", "activeForm": "Running tests", "status": "pending"}
|
|
]
|
|
}),
|
|
)
|
|
.expect("TodoWrite should succeed");
|
|
let first_output: serde_json::Value = serde_json::from_str(&first).expect("valid json");
|
|
assert_eq!(first_output["oldTodos"].as_array().expect("array").len(), 0);
|
|
|
|
let second = execute_tool(
|
|
"TodoWrite",
|
|
&json!({
|
|
"todos": [
|
|
{"content": "Add tool", "activeForm": "Adding tool", "status": "completed"},
|
|
{"content": "Run tests", "activeForm": "Running tests", "status": "completed"},
|
|
{"content": "Verify", "activeForm": "Verifying", "status": "completed"}
|
|
]
|
|
}),
|
|
)
|
|
.expect("TodoWrite should succeed");
|
|
std::env::remove_var("CLAWD_TODO_STORE");
|
|
let _ = std::fs::remove_file(path);
|
|
|
|
let second_output: serde_json::Value = serde_json::from_str(&second).expect("valid json");
|
|
assert_eq!(
|
|
second_output["oldTodos"].as_array().expect("array").len(),
|
|
2
|
|
);
|
|
assert_eq!(
|
|
second_output["newTodos"].as_array().expect("array").len(),
|
|
3
|
|
);
|
|
assert!(second_output["verificationNudgeNeeded"].is_null());
|
|
}
|
|
|
|
#[test]
|
|
fn skill_loads_local_skill_prompt() {
|
|
let result = execute_tool(
|
|
"Skill",
|
|
&json!({
|
|
"skill": "help",
|
|
"args": "overview"
|
|
}),
|
|
)
|
|
.expect("Skill should succeed");
|
|
|
|
let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
|
|
assert_eq!(output["skill"], "help");
|
|
assert!(output["path"]
|
|
.as_str()
|
|
.expect("path")
|
|
.ends_with("/help/SKILL.md"));
|
|
assert!(output["prompt"]
|
|
.as_str()
|
|
.expect("prompt")
|
|
.contains("Guide on using oh-my-codex plugin"));
|
|
}
|
|
|
|
struct TestServer {
|
|
addr: SocketAddr,
|
|
shutdown: Option<std::sync::mpsc::Sender<()>>,
|
|
handle: Option<thread::JoinHandle<()>>,
|
|
}
|
|
|
|
impl TestServer {
|
|
fn spawn(handler: Arc<dyn Fn(&str) -> HttpResponse + Send + Sync + 'static>) -> Self {
|
|
let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
|
|
listener
|
|
.set_nonblocking(true)
|
|
.expect("set nonblocking listener");
|
|
let addr = listener.local_addr().expect("local addr");
|
|
let (tx, rx) = std::sync::mpsc::channel::<()>();
|
|
|
|
let handle = thread::spawn(move || loop {
|
|
if rx.try_recv().is_ok() {
|
|
break;
|
|
}
|
|
|
|
match listener.accept() {
|
|
Ok((mut stream, _)) => {
|
|
let mut buffer = [0_u8; 4096];
|
|
let size = stream.read(&mut buffer).expect("read request");
|
|
let request = String::from_utf8_lossy(&buffer[..size]).into_owned();
|
|
let request_line = request.lines().next().unwrap_or_default().to_string();
|
|
let response = handler(&request_line);
|
|
stream
|
|
.write_all(response.to_bytes().as_slice())
|
|
.expect("write response");
|
|
}
|
|
Err(error) if error.kind() == std::io::ErrorKind::WouldBlock => {
|
|
thread::sleep(Duration::from_millis(10));
|
|
}
|
|
Err(error) => panic!("server accept failed: {error}"),
|
|
}
|
|
});
|
|
|
|
Self {
|
|
addr,
|
|
shutdown: Some(tx),
|
|
handle: Some(handle),
|
|
}
|
|
}
|
|
|
|
fn addr(&self) -> SocketAddr {
|
|
self.addr
|
|
}
|
|
}
|
|
|
|
impl Drop for TestServer {
|
|
fn drop(&mut self) {
|
|
if let Some(tx) = self.shutdown.take() {
|
|
let _ = tx.send(());
|
|
}
|
|
if let Some(handle) = self.handle.take() {
|
|
handle.join().expect("join test server");
|
|
}
|
|
}
|
|
}
|
|
|
|
struct HttpResponse {
|
|
status: u16,
|
|
reason: &'static str,
|
|
content_type: &'static str,
|
|
body: String,
|
|
}
|
|
|
|
impl HttpResponse {
|
|
fn html(status: u16, reason: &'static str, body: &str) -> Self {
|
|
Self {
|
|
status,
|
|
reason,
|
|
content_type: "text/html; charset=utf-8",
|
|
body: body.to_string(),
|
|
}
|
|
}
|
|
|
|
fn to_bytes(&self) -> Vec<u8> {
|
|
format!(
|
|
"HTTP/1.1 {} {}\r\nContent-Type: {}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
|
|
self.status,
|
|
self.reason,
|
|
self.content_type,
|
|
self.body.len(),
|
|
self.body
|
|
)
|
|
.into_bytes()
|
|
}
|
|
}
|
|
}
|