Fix UTF-8 truncation panic in tool results

- Add safe_truncate_index helper to find valid char boundaries
- Fix truncation in executor.rs, browser.rs, web.rs, git.rs, memory.rs
- Fix truncation in context.rs, retriever.rs, control.rs, routes.rs
- Prevents panic when truncating strings with multi-byte chars (e.g. Chinese)
This commit is contained in:
Thomas Marchand
2025-12-20 21:39:38 +00:00
parent c6fd67664b
commit f85ea14b3b
19 changed files with 65 additions and 22 deletions

1
cregis-audit/anychain Submodule

Submodule cregis-audit/anychain added at 9bad0bc186

Submodule cregis-audit/anychain-aptos added at 184cd217f8

Submodule cregis-audit/anychain-ffi-rust added at f18ecd0878

Submodule cregis-audit/anychain-solana added at a82094425e

Submodule cregis-audit/anychain-ton added at adfd99bbac

Submodule cregis-audit/cregis-java-sdk added at e75cbffe34

Submodule cregis-audit/cregis-sdk-php added at 5e9da58186

1
cregis-audit/easynode Submodule

Submodule cregis-audit/easynode added at 086dd7d199

1
cregis-audit/kms Submodule

Submodule cregis-audit/kms added at 0e1e32290f

View File

@@ -1038,10 +1038,18 @@ Use `search_memory` when you encounter a problem you might have solved before or
}
// Truncate tool result if too large to prevent context overflow
// Use char_indices to find a safe UTF-8 boundary for truncation
let truncated_content = if tool_message_content.len() > max_tool_result_chars {
// Find the last valid char boundary before or at max_tool_result_chars
let safe_end = tool_message_content
.char_indices()
.take_while(|(i, _)| *i < max_tool_result_chars)
.last()
.map(|(i, c)| i + c.len_utf8())
.unwrap_or(0);
format!(
"{}... [truncated, {} chars total. For large data, consider writing to a file and reading specific sections]",
&tool_message_content[..max_tool_result_chars],
&tool_message_content[..safe_end],
tool_message_content.len()
)
} else {

View File

@@ -1137,7 +1137,8 @@ async fn control_actor_loop(
if let Some((role, content)) = history.first() {
if role == "user" {
let title = if content.len() > 100 {
format!("{}...", &content[..100])
let safe_end = crate::memory::safe_truncate_index(content, 100);
format!("{}...", &content[..safe_end])
} else {
content.clone()
};

View File

@@ -434,7 +434,8 @@ async fn run_agent_task(
let response_event = crate::memory::RecordedEvent::new("TaskExecutor", crate::memory::EventKind::LlmResponse)
.with_preview(&if result.output.len() > 1000 {
result.output[..1000].to_string()
let safe_end = crate::memory::safe_truncate_index(&result.output, 1000);
result.output[..safe_end].to_string()
} else {
result.output.clone()
})
@@ -461,7 +462,8 @@ async fn run_agent_task(
"Task: {}\nResult: {}\nSuccess: {}",
task_description,
if result.output.len() > 500 {
&result.output[..500]
let safe_end = crate::memory::safe_truncate_index(&result.output, 500);
&result.output[..safe_end]
} else {
&result.output
},

View File

@@ -267,14 +267,15 @@ impl<'a> ContextBuilder<'a> {
context
}
/// Truncate tool result content if too large.
/// Truncate tool result content if too large, safe for UTF-8.
pub fn truncate_tool_result(&self, content: &str) -> String {
if content.len() <= self.config.max_tool_result_chars {
content.to_string()
} else {
let safe_end = safe_truncate_index(content, self.config.max_tool_result_chars);
format!(
"{}... [truncated, {} chars total. For large data, consider writing to a file and reading specific sections]",
&content[..self.config.max_tool_result_chars],
&content[..safe_end],
content.len()
)
}
@@ -306,23 +307,39 @@ impl<'a> ContextBuilder<'a> {
}
}
/// Truncate a string with ellipsis.
/// Find a safe UTF-8 boundary for truncation.
/// Returns the byte index that is at or before `max_bytes` and on a char boundary.
pub fn safe_truncate_index(s: &str, max_bytes: usize) -> usize {
if s.len() <= max_bytes {
return s.len();
}
// Find the last valid char boundary at or before max_bytes
s.char_indices()
.take_while(|(i, _)| *i < max_bytes)
.last()
.map(|(i, c)| i + c.len_utf8())
.unwrap_or(0)
}
/// Truncate a string with ellipsis, safe for UTF-8.
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
format!("{}...", &s[..max])
let safe_end = safe_truncate_index(s, max);
format!("{}...", &s[..safe_end])
}
}
/// Truncate a message with size info.
/// Truncate a message with size info, safe for UTF-8.
fn truncate_message(content: &str, max_chars: usize) -> String {
if content.len() <= max_chars {
content.to_string()
} else {
let safe_end = safe_truncate_index(content, max_chars);
format!(
"{}... [truncated, {} chars total]",
&content[..max_chars],
&content[..safe_end],
content.len()
)
}

View File

@@ -38,7 +38,7 @@ pub use supabase::SupabaseClient;
pub use embed::EmbeddingClient;
pub use writer::{MemoryWriter, EventRecorder, RecordedEvent};
pub use retriever::MemoryRetriever;
pub use context::{ContextBuilder, SessionContext, MemoryContext};
pub use context::{ContextBuilder, SessionContext, MemoryContext, safe_truncate_index};
use crate::config::MemoryConfig;
use std::sync::Arc;

View File

@@ -308,12 +308,13 @@ Only return the JSON array, nothing else."#,
}
}
/// Truncate a string to max length.
/// Truncate a string to max length, safe for UTF-8.
fn truncate(s: &str, max: usize) -> &str {
if s.len() <= max {
s
} else {
&s[..max]
let safe_end = super::context::safe_truncate_index(s, max);
&s[..safe_end]
}
}

View File

@@ -395,12 +395,13 @@ impl Tool for BrowserGetContent {
}
};
// Truncate if too long
// Truncate if too long (safe for UTF-8)
let max_len = 50000;
if content.len() > max_len {
let safe_end = crate::memory::safe_truncate_index(&content, max_len);
Ok(format!(
"{}\n\n... [truncated, {} total characters]",
&content[..max_len],
&content[..safe_end],
content.len()
))
} else {

View File

@@ -106,9 +106,11 @@ impl Tool for GitDiff {
if result.is_empty() {
Ok("No changes".to_string())
} else if result.len() > 10000 {
let safe_end = crate::memory::safe_truncate_index(&result, 10000);
Ok(format!(
"{}... [diff truncated, showing first 10000 chars]",
&result[..10000]
"{}... [diff truncated, showing first {} chars]",
&result[..safe_end],
safe_end
))
} else {
Ok(result)

View File

@@ -244,6 +244,7 @@ fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
s.to_string()
} else {
format!("{}...", &s[..max])
let safe_end = crate::memory::safe_truncate_index(s, max);
format!("{}...", &s[..safe_end])
}
}

View File

@@ -326,17 +326,18 @@ impl Tool for FetchUrl {
// Save full content to file
std::fs::write(&file_path, &body)?;
// Return path with preview
// Return path with preview (safe for UTF-8)
let preview_len = std::cmp::min(2000, display_content.len());
let preview = &display_content[..preview_len];
let safe_end = crate::memory::safe_truncate_index(&display_content, preview_len);
let preview = &display_content[..safe_end];
Ok(format!(
"Response too large ({} bytes). Full content saved to: {}\n\nPreview (first {} chars):\n{}{}",
body.len(),
file_path.display(),
preview_len,
safe_end,
preview,
if display_content.len() > preview_len { "\n..." } else { "" }
if display_content.len() > safe_end { "\n..." } else { "" }
))
} else {
Ok(display_content)