mirror of
https://github.com/aljazceru/opencode.git
synced 2025-12-21 09:44:21 +01:00
small fixes
This commit is contained in:
223
internal/llm/tools/fetch.go
Normal file
223
internal/llm/tools/fetch.go
Normal file
@@ -0,0 +1,223 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
md "github.com/JohannesKaufmann/html-to-markdown"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/kujtimiihoxha/termai/internal/config"
|
||||
"github.com/kujtimiihoxha/termai/internal/permission"
|
||||
)
|
||||
|
||||
const (
|
||||
FetchToolName = "fetch"
|
||||
fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
|
||||
|
||||
WHEN TO USE THIS TOOL:
|
||||
- Use when you need to download content from a URL
|
||||
- Helpful for retrieving documentation, API responses, or web content
|
||||
- Useful for getting external information to assist with tasks
|
||||
|
||||
HOW TO USE:
|
||||
- Provide the URL to fetch content from
|
||||
- Specify the desired output format (text, markdown, or html)
|
||||
- Optionally set a timeout for the request
|
||||
|
||||
FEATURES:
|
||||
- Supports three output formats: text, markdown, and html
|
||||
- Automatically handles HTTP redirects
|
||||
- Sets reasonable timeouts to prevent hanging
|
||||
- Validates input parameters before making requests
|
||||
|
||||
LIMITATIONS:
|
||||
- Maximum response size is 5MB
|
||||
- Only supports HTTP and HTTPS protocols
|
||||
- Cannot handle authentication or cookies
|
||||
- Some websites may block automated requests
|
||||
|
||||
TIPS:
|
||||
- Use text format for plain text content or simple API responses
|
||||
- Use markdown format for content that should be rendered with formatting
|
||||
- Use html format when you need the raw HTML structure
|
||||
- Set appropriate timeouts for potentially slow websites`
|
||||
)
|
||||
|
||||
type FetchParams struct {
|
||||
URL string `json:"url"`
|
||||
Format string `json:"format"`
|
||||
Timeout int `json:"timeout,omitempty"`
|
||||
}
|
||||
|
||||
type FetchPermissionsParams struct {
|
||||
URL string `json:"url"`
|
||||
Format string `json:"format"`
|
||||
Timeout int `json:"timeout,omitempty"`
|
||||
}
|
||||
|
||||
type fetchTool struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func NewFetchTool() BaseTool {
|
||||
return &fetchTool{
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fetchTool) Info() ToolInfo {
|
||||
return ToolInfo{
|
||||
Name: FetchToolName,
|
||||
Description: fetchToolDescription,
|
||||
Parameters: map[string]any{
|
||||
"url": map[string]any{
|
||||
"type": "string",
|
||||
"description": "The URL to fetch content from",
|
||||
},
|
||||
"format": map[string]any{
|
||||
"type": "string",
|
||||
"description": "The format to return the content in (text, markdown, or html)",
|
||||
},
|
||||
"timeout": map[string]any{
|
||||
"type": "number",
|
||||
"description": "Optional timeout in seconds (max 120)",
|
||||
},
|
||||
},
|
||||
Required: []string{"url", "format"},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
|
||||
var params FetchParams
|
||||
if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
|
||||
return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
|
||||
}
|
||||
|
||||
if params.URL == "" {
|
||||
return NewTextErrorResponse("URL parameter is required"), nil
|
||||
}
|
||||
|
||||
format := strings.ToLower(params.Format)
|
||||
if format != "text" && format != "markdown" && format != "html" {
|
||||
return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
|
||||
return NewTextErrorResponse("URL must start with http:// or https://"), nil
|
||||
}
|
||||
|
||||
p := permission.Default.Request(
|
||||
permission.CreatePermissionRequest{
|
||||
Path: config.WorkingDirectory(),
|
||||
ToolName: FetchToolName,
|
||||
Action: "fetch",
|
||||
Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
|
||||
Params: FetchPermissionsParams{
|
||||
URL: params.URL,
|
||||
Format: params.Format,
|
||||
Timeout: params.Timeout,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
if !p {
|
||||
return NewTextErrorResponse("Permission denied to fetch from URL: " + params.URL), nil
|
||||
}
|
||||
|
||||
client := t.client
|
||||
if params.Timeout > 0 {
|
||||
maxTimeout := 120 // 2 minutes
|
||||
if params.Timeout > maxTimeout {
|
||||
params.Timeout = maxTimeout
|
||||
}
|
||||
client = &http.Client{
|
||||
Timeout: time.Duration(params.Timeout) * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
|
||||
if err != nil {
|
||||
return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "termai/1.0")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
|
||||
}
|
||||
|
||||
maxSize := int64(5 * 1024 * 1024) // 5MB
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
|
||||
if err != nil {
|
||||
return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
|
||||
}
|
||||
|
||||
content := string(body)
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
|
||||
switch format {
|
||||
case "text":
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
text, err := extractTextFromHTML(content)
|
||||
if err != nil {
|
||||
return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
|
||||
}
|
||||
return NewTextResponse(text), nil
|
||||
}
|
||||
return NewTextResponse(content), nil
|
||||
|
||||
case "markdown":
|
||||
if strings.Contains(contentType, "text/html") {
|
||||
markdown, err := convertHTMLToMarkdown(content)
|
||||
if err != nil {
|
||||
return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
|
||||
}
|
||||
return NewTextResponse(markdown), nil
|
||||
}
|
||||
|
||||
return NewTextResponse("```\n" + content + "\n```"), nil
|
||||
|
||||
case "html":
|
||||
return NewTextResponse(content), nil
|
||||
|
||||
default:
|
||||
return NewTextResponse(content), nil
|
||||
}
|
||||
}
|
||||
|
||||
func extractTextFromHTML(html string) (string, error) {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
text := doc.Text()
|
||||
text = strings.Join(strings.Fields(text), " ")
|
||||
|
||||
return text, nil
|
||||
}
|
||||
|
||||
func convertHTMLToMarkdown(html string) (string, error) {
|
||||
converter := md.NewConverter("", true, nil)
|
||||
|
||||
markdown, err := converter.ConvertString(html)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return markdown, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user