fix: use safe_truncate to truncate charactor (#3263) (#3264)

Signed-off-by: toyamagu2021@gmail.com <toyamagu2021@gmail.com>
2026-02-09 08:34:22 +01:00 · 2025-07-15 19:34:40 +09:00
parent c6225e38d0
commit d3beeace95
13 changed files with 46 additions and 52 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3598,6 +3598,7 @@ dependencies = [
 "criterion",
 "ctor",
 "dotenv",
+ "goose",
 "include_dir",
 "indoc 1.0.9",
 "lazy_static",
--- a/crates/goose-cli/src/commands/project.rs
+++ b/crates/goose-cli/src/commands/project.rs
@@ -4,7 +4,7 @@ use cliclack::{self, intro, outro};
 use std::path::Path;

 use crate::project_tracker::ProjectTracker;
-use crate::utils::safe_truncate;
+use goose::utils::safe_truncate;

 /// Format a DateTime for display
 fn format_date(date: DateTime<chrono::Utc>) -> String {
--- a/crates/goose-cli/src/commands/session.rs
+++ b/crates/goose-cli/src/commands/session.rs
@@ -1,9 +1,9 @@
 use crate::session::message_to_markdown;
-use crate::utils::safe_truncate;
 use anyhow::{Context, Result};
 use cliclack::{confirm, multiselect, select};
 use goose::session::info::{get_valid_sorted_sessions, SessionInfo, SortOrder};
 use goose::session::{self, Identifier};
+use goose::utils::safe_truncate;
 use regex::Regex;
 use std::fs;
 use std::path::{Path, PathBuf};
--- a/crates/goose-cli/src/lib.rs
+++ b/crates/goose-cli/src/lib.rs
@@ -7,7 +7,6 @@ pub mod project_tracker;
 pub mod recipes;
 pub mod session;
 pub mod signal;
-pub mod utils;
 // Re-export commonly used types
 pub use session::Session;

--- a/crates/goose-cli/src/session/export.rs
+++ b/crates/goose-cli/src/session/export.rs
@@ -1,4 +1,5 @@
 use goose::message::{Message, MessageContent, ToolRequest, ToolResponse};
+use goose::utils::safe_truncate;
 use mcp_core::content::Content as McpContent;
 use mcp_core::resource::ResourceContents;
 use mcp_core::role::Role;
@@ -10,9 +11,9 @@ const REDACTED_PREFIX_LENGTH: usize = 100; // Show first 100 chars before trimmi
 fn value_to_simple_markdown_string(value: &Value, export_full_strings: bool) -> String {
    match value {
        Value::String(s) => {
-            if !export_full_strings && s.len() > MAX_STRING_LENGTH_MD_EXPORT {
-                let prefix = &s[..REDACTED_PREFIX_LENGTH.min(s.len())];
-                let trimmed_chars = s.len() - prefix.len();
+            if !export_full_strings && s.chars().count() > MAX_STRING_LENGTH_MD_EXPORT {
+                let prefix = safe_truncate(s, REDACTED_PREFIX_LENGTH);
+                let trimmed_chars = s.chars().count() - prefix.chars().count();
                format!("`{}[ ... trimmed : {} chars ... ]`", prefix, trimmed_chars)
            } else {
                // Escape backticks and newlines for inline code.
@@ -40,7 +41,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) ->
                    md_string.push_str(&format!("{}*   **{}**: ", base_indent_str, key));
                    match val {
                        Value::String(s) => {
-                            if s.contains('\n') || s.len() > 80 {
+                            if s.contains('\n') || s.chars().count() > 80 {
                                // Heuristic for block
                                md_string.push_str(&format!(
                                    "\n{}    ```\n{}{}\n{}    ```\n",
@@ -74,7 +75,7 @@ fn value_to_markdown(value: &Value, depth: usize, export_full_strings: bool) ->
                    md_string.push_str(&format!("{}*   - ", base_indent_str));
                    match item {
                        Value::String(s) => {
-                            if s.contains('\n') || s.len() > 80 {
+                            if s.contains('\n') || s.chars().count() > 80 {
                                // Heuristic for block
                                md_string.push_str(&format!(
                                    "\n{}      ```\n{}{}\n{}      ```\n",
@@ -397,7 +398,7 @@ mod tests {
        assert!(result.starts_with("`"));
        assert!(result.contains("[ ... trimmed : "));
        assert!(result.contains("4900 chars ... ]`"));
-        assert!(result.contains(&"a".repeat(100))); // Should contain the prefix
+        assert!(result.contains(&"a".repeat(97))); // Should contain the prefix (100 - 3 for "...")
    }

    #[test]
--- a/crates/goose-cli/src/session/mod.rs
+++ b/crates/goose-cli/src/session/mod.rs
@@ -16,6 +16,7 @@ use goose::permission::Permission;
 use goose::permission::PermissionConfirmation;
 use goose::providers::base::Provider;
 pub use goose::session::Identifier;
+use goose::utils::safe_truncate;

 use anyhow::{Context, Result};
 use completion::GooseCompleter;
@@ -1037,11 +1038,7 @@ impl Session {
                                                            if min_priority > 0.1 && !self.debug {
                                                                // High/Medium verbosity: show truncated response
                                                                if let Some(response_content) = msg.strip_prefix("Responded: ") {
-                                                                    if response_content.len() > 100 {
-                                                                        format!("🤖 Responded: {}...", &response_content[..100])
-                                                                    } else {
-                                                                        format!("🤖 {}", msg)
-                                                                    }
+                                                                    format!("🤖 Responded: {}", safe_truncate(response_content, 100))
                                                                } else {
                                                                    format!("🤖 {}", msg)
                                                                }
--- a/crates/goose-llm/Cargo.toml
+++ b/crates/goose-llm/Cargo.toml
@@ -15,6 +15,7 @@ crate-type = ["lib", "cdylib"]
 name = "goose_llm"

 [dependencies]
+goose = { path = "../goose" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 anyhow = "1.0"
--- a/crates/goose-llm/src/extractors/session_name.rs
+++ b/crates/goose-llm/src/extractors/session_name.rs
@@ -3,6 +3,7 @@ use crate::providers::errors::ProviderError;
 use crate::types::core::Role;
 use crate::{message::Message, types::json_value_ffi::JsonValueFfi};
 use anyhow::Result;
+use goose::utils::safe_truncate;
 use indoc::indoc;
 use serde_json::{json, Value};

@@ -60,11 +61,7 @@ pub async fn generate_session_name(
        .take(3)
        .map(|m| {
            let text = m.content.concat_text_str();
-            if text.len() > 300 {
-                text.chars().take(300).collect()
-            } else {
-                text
-            }
+            safe_truncate(&text, 300)
        })
        .collect();

--- a/crates/goose/src/agents/large_response_handler.rs
+++ b/crates/goose/src/agents/large_response_handler.rs
@@ -17,14 +17,14 @@ pub fn process_tool_response(
                match content {
                    Content::Text(text_content) => {
                        // Check if text exceeds threshold
-                        if text_content.text.len() > LARGE_TEXT_THRESHOLD {
+                        if text_content.text.chars().count() > LARGE_TEXT_THRESHOLD {
                            // Write to temp file
                            match write_large_text_to_file(&text_content.text) {
                                Ok(file_path) => {
                                    // Create a new text content with reference to the file
                                    let message = format!(
                                        "The response returned from the tool call was larger ({} characters) and is stored in the file which you can use other tools to examine or search in: {}",
-                                        text_content.text.len(),
+                                        text_content.text.chars().count(),
                                        file_path
                                    );
                                    processed_contents.push(Content::text(message));
--- a/crates/goose/src/context_mgmt/truncate.rs
+++ b/crates/goose/src/context_mgmt/truncate.rs
@@ -1,4 +1,5 @@
 use crate::message::{Message, MessageContent};
+use crate::utils::safe_truncate;
 use anyhow::{anyhow, Result};
 use mcp_core::{Content, ResourceContents, Role};
 use std::collections::HashSet;
@@ -75,11 +76,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
    for content in &mut new_message.content {
        match content {
            MessageContent::Text(text_content) => {
-                if text_content.text.len() > max_content_size {
+                if text_content.text.chars().count() > max_content_size {
                    let truncated = format!(
                        "{}\n\n[... content truncated from {} to {} characters ...]",
-                        &text_content.text[..max_content_size.min(text_content.text.len())],
-                        text_content.text.len(),
+                        safe_truncate(&text_content.text, max_content_size),
+                        text_content.text.chars().count(),
                        max_content_size
                    );
                    text_content.text = truncated;
@@ -89,11 +90,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
                if let Ok(ref mut result) = tool_response.tool_result {
                    for content_item in result {
                        if let Content::Text(ref mut text_content) = content_item {
-                            if text_content.text.len() > max_content_size {
+                            if text_content.text.chars().count() > max_content_size {
                                let truncated = format!(
                                    "{}\n\n[... tool response truncated from {} to {} characters ...]",
-                                    &text_content.text[..max_content_size.min(text_content.text.len())],
-                                    text_content.text.len(),
+                                    safe_truncate(&text_content.text, max_content_size),
+                                    text_content.text.chars().count(),
                                    max_content_size
                                );
                                text_content.text = truncated;
@@ -104,11 +105,11 @@ fn truncate_message_content(message: &Message, max_content_size: usize) -> Resul
                            if let ResourceContents::TextResourceContents { text, .. } =
                                &mut resource_content.resource
                            {
-                                if text.len() > max_content_size {
+                                if text.chars().count() > max_content_size {
                                    let truncated = format!(
                                        "{}\n\n[... resource content truncated from {} to {} characters ...]",
-                                        &text[..max_content_size.min(text.len())],
-                                        text.len(),
+                                        safe_truncate(text, max_content_size),
+                                        text.chars().count(),
                                        max_content_size
                                    );
                                    *text = truncated;
--- a/crates/goose/src/lib.rs
+++ b/crates/goose/src/lib.rs
@@ -15,6 +15,7 @@ pub mod temporal_scheduler;
 pub mod token_counter;
 pub mod tool_monitor;
 pub mod tracing;
+pub mod utils;

 #[cfg(test)]
 mod cron_test;
--- a/crates/goose/src/session/storage.rs
+++ b/crates/goose/src/session/storage.rs
@@ -7,6 +7,7 @@

 use crate::message::Message;
 use crate::providers::base::Provider;
+use crate::utils::safe_truncate;
 use anyhow::Result;
 use chrono::Local;
 use etcetera::{choose_app_strategy, AppStrategy, AppStrategyArgs};
@@ -605,7 +606,7 @@ pub fn read_messages_with_truncation(
        // Log details about corrupted lines (with limited detail for security)
        for (num, line) in &corrupted_lines {
            let preview = if line.len() > 50 {
-                format!("{}... (truncated)", &line[..50])
+                format!("{}... (truncated)", safe_truncate(line, 50))
            } else {
                line.clone()
            };
@@ -678,11 +679,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
    for content in &mut message.content {
        match content {
            MessageContent::Text(text_content) => {
-                if text_content.text.len() > max_content_size {
+                if text_content.text.chars().count() > max_content_size {
                    let truncated = format!(
                        "{}\n\n[... content truncated during session loading from {} to {} characters ...]",
-                        &text_content.text[..max_content_size.min(text_content.text.len())],
-                        text_content.text.len(),
+                        safe_truncate(&text_content.text, max_content_size),
+                        text_content.text.chars().count(),
                        max_content_size
                    );
                    text_content.text = truncated;
@@ -693,11 +694,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
                    for content_item in result {
                        match content_item {
                            Content::Text(ref mut text_content) => {
-                                if text_content.text.len() > max_content_size {
+                                if text_content.text.chars().count() > max_content_size {
                                    let truncated = format!(
                                        "{}\n\n[... tool response truncated during session loading from {} to {} characters ...]",
-                                        &text_content.text[..max_content_size.min(text_content.text.len())],
-                                        text_content.text.len(),
+                                        safe_truncate(&text_content.text, max_content_size),
+                                        text_content.text.chars().count(),
                                        max_content_size
                                    );
                                    text_content.text = truncated;
@@ -707,11 +708,11 @@ fn truncate_message_content_in_place(message: &mut Message, max_content_size: us
                                if let ResourceContents::TextResourceContents { text, .. } =
                                    &mut resource_content.resource
                                {
-                                    if text.len() > max_content_size {
+                                    if text.chars().count() > max_content_size {
                                        let truncated = format!(
                                            "{}\n\n[... resource content truncated during session loading from {} to {} characters ...]",
-                                            &text[..max_content_size.min(text.len())],
-                                            text.len(),
+                                            safe_truncate(text, max_content_size),
+                                            text.chars().count(),
                                            max_content_size
                                        );
                                        *text = truncated;
@@ -751,7 +752,7 @@ fn attempt_corruption_recovery(json_str: &str, max_content_size: Option<usize>)
    // Strategy 4: Create a placeholder message with the raw content
    println!("[SESSION] All recovery strategies failed, creating placeholder message");
    let preview = if json_str.len() > 200 {
-        format!("{}...", &json_str[..200])
+        format!("{}...", safe_truncate(json_str, 200))
    } else {
        json_str.to_string()
    };
@@ -968,7 +969,7 @@ fn truncate_json_string(json_str: &str, max_content_size: usize) -> String {
            if text_content.len() > max_content_size {
                let truncated_text = format!(
                    "{}\n\n[... content truncated during JSON parsing from {} to {} characters ...]",
-                    &text_content[..max_content_size.min(text_content.len())],
+                    safe_truncate(text_content, max_content_size),
                    text_content.len(),
                    max_content_size
                );
@@ -1269,11 +1270,7 @@ pub async fn generate_description_with_schedule_id(
        .take(3) // Use up to first 3 user messages for context
        .map(|m| {
            let text = m.as_concat_text();
-            if text.len() > 300 {
-                format!("{}...", &text[..300])
-            } else {
-                text
-            }
+            safe_truncate(&text, 300)
        })
        .collect();

@@ -1302,9 +1299,9 @@ pub async fn generate_description_with_schedule_id(
    let description = result.0.as_concat_text();

    // Validate description length for security
-    let sanitized_description = if description.len() > 100 {
+    let sanitized_description = if description.chars().count() > 100 {
        tracing::warn!("Generated description too long, truncating");
-        format!("{}...", &description[..97])
+        safe_truncate(&description, 100)
    } else {
        description
    };
@@ -1379,9 +1376,9 @@ mod tests {
            println!(
                "[TEST] Input: {}",
                if corrupt_json.len() > 100 {
-                    &corrupt_json[..100]
+                    safe_truncate(corrupt_json, 100)
                } else {
-                    corrupt_json
+                    corrupt_json.to_string()
                }
            );

--- a/crates/goose-cli/src/utils.rs
+++ b/crates/goose-cli/src/utils.rs
@@ -1,4 +1,3 @@
-/// Utility functions for safe string handling and other common operations
 /// Safely truncate a string at character boundaries, not byte boundaries
 ///
 /// This function ensures that multi-byte UTF-8 characters (like Japanese, emoji, etc.)