fix: use safe_truncate to truncate charactor (#3263) (#3264)

Signed-off-by: toyamagu2021@gmail.com <toyamagu2021@gmail.com>
This commit is contained in:
toyamagu-2021
2025-07-15 19:34:40 +09:00
committed by GitHub
parent c6225e38d0
commit d3beeace95
13 changed files with 46 additions and 52 deletions

1
Cargo.lock generated
View File

@@ -3598,6 +3598,7 @@ dependencies = [
"criterion",
"ctor",
"dotenv",
"goose",
"include_dir",
"indoc 1.0.9",
"lazy_static",

View File

@@ -4,7 +4,7 @@ use cliclack::{self, intro, outro};
use std::path::Path;
use crate::project_tracker::ProjectTracker;
use crate::utils::safe_truncate;
use goose::utils::safe_truncate;
/// Format a DateTime for display
fn format_date(date: DateTime<chrono::Utc>) -> String {

View File

@@ -1,9 +1,9 @@
use crate::session::message_to_markdown;
use crate::utils::safe_truncate;
use anyhow::{Context, Result};
use cliclack::{confirm, multiselect, select};
use goose::session::info::{get_valid_sorted_sessions, SessionInfo, SortOrder};
use goose::session::{self, Identifier};
use goose::utils::safe_truncate;
use regex::Regex;
use std::fs;
use std::path::{Path, PathBuf};

View File

@@ -7,7 +7,6 @@ pub mod project_tracker;
pub mod recipes;
pub mod session;
pub mod signal;
pub mod utils;
// Re-export commonly used types
pub use session::Session;

View File

@@ -1,4 +1,5 @@
use goose::message::{Message, MessageContent, ToolRequest, ToolResponse};
use goose::utils::safe_truncate;
use mcp_core::content::Content as McpContent;
use mcp_core::resource::ResourceContents;
use mcp_core::role::Role;
@@ -10,9 +11,9 @@ const REDACTED_PREFIX_LENGTH: usize = 100; // Show first 100 chars before trimmi
fn value_to_simple_markdown_string(value: &Value, export_full_strings: bool) -> String {
match value {
Value::String(s) => {
if !export_full_strings && s.len() > MAX_STRING_LENGTH_MD_EXPORT {
let prefix = &s[..REDACTED_PREFIX_LENGTH.min(s.len())];
let trimmed_chars = s.len() - prefix.len();
if !export_full_strings && s.chars().count() > MAX_STRING_LENGTH_MD_EXPORT {
let prefix = safe_truncate(s, REDACTED_PREFIX_LENGTH);
let trimmed_chars = s.chars().count() - prefix.chars().count();
format!("`{}[ ... trimmed : {} chars ... ]`", prefix, trimmed_chars)
} else {
// Escape backticks and newlines for inline code.
@@ -40,7 +41,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) ->
md_string.push_str(&format!("{}* **{}**: ", base_indent_str, key));
match val {
Value::String(s) => {
if s.contains('\n') || s.len() > 80 {
if s.contains('\n') || s.chars().count() > 80 {
// Heuristic for block
md_string.push_str(&format!(
"\n{} ```\n{}{}\n{} ```\n",
@@ -74,7 +75,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) ->
md_string.push_str(&format!("{}* - ", base_indent_str));
match item {
Value::String(s) => {
if s.contains('\n') || s.len() > 80 {
if s.contains('\n') || s.chars().count() > 80 {
// Heuristic for block
md_string.push_str(&format!(
"\n{} ```\n{}{}\n{} ```\n",
@@ -397,7 +398,7 @@ mod tests {
assert!(result.starts_with("`"));
assert!(result.contains("[ ... trimmed : "));
assert!(result.contains("4900 chars ... ]`"));
assert!(result.contains(&"a".repeat(100))); // Should contain the prefix
assert!(result.contains(&"a".repeat(97))); // Should contain the prefix (100 - 3 for "...")
}
#[test]

View File

@@ -16,6 +16,7 @@ use goose::permission::Permission;
use goose::permission::PermissionConfirmation;
use goose::providers::base::Provider;
pub use goose::session::Identifier;
use goose::utils::safe_truncate;
use anyhow::{Context, Result};
use completion::GooseCompleter;
@@ -1037,11 +1038,7 @@ impl Session {
if min_priority > 0.1 && !self.debug {
// High/Medium verbosity: show truncated response
if let Some(response_content) = msg.strip_prefix("Responded: ") {
if response_content.len() > 100 {
format!("🤖 Responded: {}...", &response_content[..100])
} else {
format!("🤖 {}", msg)
}
format!("🤖 Responded: {}", safe_truncate(response_content, 100))
} else {
format!("🤖 {}", msg)
}

View File

@@ -15,6 +15,7 @@ crate-type = ["lib", "cdylib"]
name = "goose_llm"
[dependencies]
goose = { path = "../goose" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
anyhow = "1.0"

View File

@@ -3,6 +3,7 @@ use crate::providers::errors::ProviderError;
use crate::types::core::Role;
use crate::{message::Message, types::json_value_ffi::JsonValueFfi};
use anyhow::Result;
use goose::utils::safe_truncate;
use indoc::indoc;
use serde_json::{json, Value};
@@ -60,11 +61,7 @@ pub async fn generate_session_name(
.take(3)
.map(|m| {
let text = m.content.concat_text_str();
if text.len() > 300 {
text.chars().take(300).collect()
} else {
text
}
safe_truncate(&text, 300)
})
.collect();

View File

@@ -17,14 +17,14 @@ pub fn process_tool_response(
match content {
Content::Text(text_content) => {
// Check if text exceeds threshold
if text_content.text.len() > LARGE_TEXT_THRESHOLD {
if text_content.text.chars().count() > LARGE_TEXT_THRESHOLD {
// Write to temp file
match write_large_text_to_file(&text_content.text) {
Ok(file_path) => {
// Create a new text content with reference to the file
let message = format!(
"The response returned from the tool call was larger ({} characters) and is stored in the file which you can use other tools to examine or search in: {}",
text_content.text.len(),
text_content.text.chars().count(),
file_path
);
processed_contents.push(Content::text(message));

View File

@@ -1,4 +1,5 @@
use crate::message::{Message, MessageContent};
use crate::utils::safe_truncate;
use anyhow::{anyhow, Result};
use mcp_core::{Content, ResourceContents, Role};
use std::collections::HashSet;
@@ -75,11 +76,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
for content in &mut new_message.content {
match content {
MessageContent::Text(text_content) => {
if text_content.text.len() > max_content_size {
if text_content.text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... content truncated from {} to {} characters ...]",
&text_content.text[..max_content_size.min(text_content.text.len())],
text_content.text.len(),
safe_truncate(&text_content.text, max_content_size),
text_content.text.chars().count(),
max_content_size
);
text_content.text = truncated;
@@ -89,11 +90,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
if let Ok(ref mut result) = tool_response.tool_result {
for content_item in result {
if let Content::Text(ref mut text_content) = content_item {
if text_content.text.len() > max_content_size {
if text_content.text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... tool response truncated from {} to {} characters ...]",
&text_content.text[..max_content_size.min(text_content.text.len())],
text_content.text.len(),
safe_truncate(&text_content.text, max_content_size),
text_content.text.chars().count(),
max_content_size
);
text_content.text = truncated;
@@ -104,11 +105,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
if let ResourceContents::TextResourceContents { text, .. } =
&mut resource_content.resource
{
if text.len() > max_content_size {
if text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... resource content truncated from {} to {} characters ...]",
&text[..max_content_size.min(text.len())],
text.len(),
safe_truncate(text, max_content_size),
text.chars().count(),
max_content_size
);
*text = truncated;

View File

@@ -15,6 +15,7 @@ pub mod temporal_scheduler;
pub mod token_counter;
pub mod tool_monitor;
pub mod tracing;
pub mod utils;
#[cfg(test)]
mod cron_test;

View File

@@ -7,6 +7,7 @@
use crate::message::Message;
use crate::providers::base::Provider;
use crate::utils::safe_truncate;
use anyhow::Result;
use chrono::Local;
use etcetera::{choose_app_strategy, AppStrategy, AppStrategyArgs};
@@ -605,7 +606,7 @@ pub fn read_messages_with_truncation(
// Log details about corrupted lines (with limited detail for security)
for (num, line) in &corrupted_lines {
let preview = if line.len() > 50 {
format!("{}... (truncated)", &line[..50])
format!("{}... (truncated)", safe_truncate(line, 50))
} else {
line.clone()
};
@@ -678,11 +679,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
for content in &mut message.content {
match content {
MessageContent::Text(text_content) => {
if text_content.text.len() > max_content_size {
if text_content.text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... content truncated during session loading from {} to {} characters ...]",
&text_content.text[..max_content_size.min(text_content.text.len())],
text_content.text.len(),
safe_truncate(&text_content.text, max_content_size),
text_content.text.chars().count(),
max_content_size
);
text_content.text = truncated;
@@ -693,11 +694,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
for content_item in result {
match content_item {
Content::Text(ref mut text_content) => {
if text_content.text.len() > max_content_size {
if text_content.text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... tool response truncated during session loading from {} to {} characters ...]",
&text_content.text[..max_content_size.min(text_content.text.len())],
text_content.text.len(),
safe_truncate(&text_content.text, max_content_size),
text_content.text.chars().count(),
max_content_size
);
text_content.text = truncated;
@@ -707,11 +708,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
if let ResourceContents::TextResourceContents { text, .. } =
&mut resource_content.resource
{
if text.len() > max_content_size {
if text.chars().count() > max_content_size {
let truncated = format!(
"{}\n\n[... resource content truncated during session loading from {} to {} characters ...]",
&text[..max_content_size.min(text.len())],
text.len(),
safe_truncate(text, max_content_size),
text.chars().count(),
max_content_size
);
*text = truncated;
@@ -751,7 +752,7 @@ fn attempt_corruption_recovery(json_str: &str, max_content_size: Option<usize>)
// Strategy 4: Create a placeholder message with the raw content
println!("[SESSION] All recovery strategies failed, creating placeholder message");
let preview = if json_str.len() > 200 {
format!("{}...", &json_str[..200])
format!("{}...", safe_truncate(json_str, 200))
} else {
json_str.to_string()
};
@@ -968,7 +969,7 @@ fn truncate_json_string(json_str: &str, max_content_size: usize) -> String {
if text_content.len() > max_content_size {
let truncated_text = format!(
"{}\n\n[... content truncated during JSON parsing from {} to {} characters ...]",
&text_content[..max_content_size.min(text_content.len())],
safe_truncate(text_content, max_content_size),
text_content.len(),
max_content_size
);
@@ -1269,11 +1270,7 @@ pub async fn generate_description_with_schedule_id(
.take(3) // Use up to first 3 user messages for context
.map(|m| {
let text = m.as_concat_text();
if text.len() > 300 {
format!("{}...", &text[..300])
} else {
text
}
safe_truncate(&text, 300)
})
.collect();
@@ -1302,9 +1299,9 @@ pub async fn generate_description_with_schedule_id(
let description = result.0.as_concat_text();
// Validate description length for security
let sanitized_description = if description.len() > 100 {
let sanitized_description = if description.chars().count() > 100 {
tracing::warn!("Generated description too long, truncating");
format!("{}...", &description[..97])
safe_truncate(&description, 100)
} else {
description
};
@@ -1379,9 +1376,9 @@ mod tests {
println!(
"[TEST] Input: {}",
if corrupt_json.len() > 100 {
&corrupt_json[..100]
safe_truncate(corrupt_json, 100)
} else {
corrupt_json
corrupt_json.to_string()
}
);

View File

@@ -1,4 +1,3 @@
/// Utility functions for safe string handling and other common operations
/// Safely truncate a string at character boundaries, not byte boundaries
///
/// This function ensures that multi-byte UTF-8 characters (like Japanese, emoji, etc.)