mirror of
https://github.com/aljazceru/goose.git
synced 2025-12-18 14:44:21 +01:00
feat: add support for full url extraction for google_drive (#2121)
This commit is contained in:
@@ -4,6 +4,7 @@ pub mod storage;
|
|||||||
use anyhow::{Context, Error};
|
use anyhow::{Context, Error};
|
||||||
use base64::Engine;
|
use base64::Engine;
|
||||||
use indoc::indoc;
|
use indoc::indoc;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
use mcp_core::tool::ToolAnnotations;
|
use mcp_core::tool::ToolAnnotations;
|
||||||
use oauth_pkce::PkceOAuth2Client;
|
use oauth_pkce::PkceOAuth2Client;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
@@ -54,6 +55,18 @@ enum PaginationState {
|
|||||||
End,
|
End,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref GOOGLE_DRIVE_ID_REGEX: Regex =
|
||||||
|
Regex::new(r"^(?:https:\/\/)(?:[\w-]+\.)?google\.com\/(?:[^\/]+\/)*d\/([a-zA-Z0-9_-]+)")
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_google_drive_id(url: &str) -> Option<&str> {
|
||||||
|
GOOGLE_DRIVE_ID_REGEX
|
||||||
|
.captures(url)
|
||||||
|
.and_then(|caps| caps.get(1).map(|m| m.as_str()))
|
||||||
|
}
|
||||||
|
|
||||||
pub struct GoogleDriveRouter {
|
pub struct GoogleDriveRouter {
|
||||||
tools: Vec<Tool>,
|
tools: Vec<Tool>,
|
||||||
instructions: String,
|
instructions: String,
|
||||||
@@ -226,13 +239,15 @@ impl GoogleDriveRouter {
|
|||||||
let read_tool = Tool::new(
|
let read_tool = Tool::new(
|
||||||
"read".to_string(),
|
"read".to_string(),
|
||||||
indoc! {r#"
|
indoc! {r#"
|
||||||
Read a file from google drive using the file uri.
|
Read a file from google drive using the file URI or the full google drive URL.
|
||||||
|
One of URI or URL MUST is required.
|
||||||
|
|
||||||
Optionally include base64 encoded images, false by default.
|
Optionally include base64 encoded images, false by default.
|
||||||
|
|
||||||
Example extracting URIs from URLs:
|
Example extracting URIs from URLs:
|
||||||
Given "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr"
|
Given "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr"
|
||||||
Pass in "gdrive:///1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc"
|
Pass in "gdrive:///1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc"
|
||||||
Do not include any other path parameters.
|
Do not include any other path parameters when using URI.
|
||||||
"#}
|
"#}
|
||||||
.to_string(),
|
.to_string(),
|
||||||
json!({
|
json!({
|
||||||
@@ -240,14 +255,17 @@ impl GoogleDriveRouter {
|
|||||||
"properties": {
|
"properties": {
|
||||||
"uri": {
|
"uri": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "google drive uri of the file to read",
|
"description": "google drive uri of the file to read, use this when you have the file URI",
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "the full google drive URL to read the file from, use this when the user gives a full https url",
|
||||||
},
|
},
|
||||||
"includeImages": {
|
"includeImages": {
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Whether or not to include images as base64 encoded strings, defaults to false",
|
"description": "Whether or not to include images as base64 encoded strings, defaults to false",
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": ["uri"],
|
|
||||||
}),
|
}),
|
||||||
Some(ToolAnnotations {
|
Some(ToolAnnotations {
|
||||||
title: Some("Read GDrive".to_string()),
|
title: Some("Read GDrive".to_string()),
|
||||||
@@ -1186,23 +1204,46 @@ impl GoogleDriveRouter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn read(&self, params: Value) -> Result<Vec<Content>, ToolError> {
|
async fn read(&self, params: Value) -> Result<Vec<Content>, ToolError> {
|
||||||
let uri =
|
let (maybe_uri, maybe_url) = (
|
||||||
params
|
params.get("uri").and_then(|q| q.as_str()),
|
||||||
.get("uri")
|
params.get("url").and_then(|q| q.as_str()),
|
||||||
.and_then(|q| q.as_str())
|
);
|
||||||
.ok_or(ToolError::InvalidParameters(
|
|
||||||
"The uri of the file is required".to_string(),
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let drive_uri = uri.replace("gdrive:///", "");
|
let drive_uri = match (maybe_uri, maybe_url) {
|
||||||
|
(Some(uri), None) => {
|
||||||
|
let drive_uri = uri.replace("gdrive:///", "");
|
||||||
|
|
||||||
// Validation: check for / path separators as invalid uris
|
// Validation: check for / path separators as invalid uris
|
||||||
if drive_uri.contains('/') {
|
if drive_uri.contains('/') {
|
||||||
return Err(ToolError::InvalidParameters(format!(
|
return Err(ToolError::InvalidParameters(format!(
|
||||||
"The uri '{}' conatins extra '/'. Only the base URI is allowed.",
|
"The uri '{}' conatins extra '/'. Only the base URI is allowed.",
|
||||||
uri
|
uri
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
drive_uri
|
||||||
|
}
|
||||||
|
(None, Some(url)) => {
|
||||||
|
if let Some(drive_uri) = extract_google_drive_id(url) {
|
||||||
|
drive_uri.to_string()
|
||||||
|
} else {
|
||||||
|
return Err(ToolError::InvalidParameters(format!(
|
||||||
|
"Failed to extract valid google drive URI from {}",
|
||||||
|
url
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(_), Some(_)) => {
|
||||||
|
return Err(ToolError::InvalidParameters(
|
||||||
|
"Only one of 'uri' or 'url' should be provided".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(None, None) => {
|
||||||
|
return Err(ToolError::InvalidParameters(
|
||||||
|
"Either 'uri' or 'url' must be provided".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let include_images = params
|
let include_images = params
|
||||||
.get("includeImages")
|
.get("includeImages")
|
||||||
@@ -1211,7 +1252,10 @@ impl GoogleDriveRouter {
|
|||||||
|
|
||||||
let metadata = self.fetch_file_metadata(&drive_uri).await?;
|
let metadata = self.fetch_file_metadata(&drive_uri).await?;
|
||||||
let mime_type = metadata.mime_type.ok_or_else(|| {
|
let mime_type = metadata.mime_type.ok_or_else(|| {
|
||||||
ToolError::ExecutionError(format!("Missing mime type in file metadata for {}.", uri))
|
ToolError::ExecutionError(format!(
|
||||||
|
"Missing mime type in file metadata for {}.",
|
||||||
|
drive_uri
|
||||||
|
))
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Handle Google Docs export
|
// Handle Google Docs export
|
||||||
@@ -2797,3 +2841,63 @@ impl Clone for GoogleDriveRouter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_document_url() {
|
||||||
|
let url = "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr";
|
||||||
|
assert_eq!(
|
||||||
|
extract_google_drive_id(url),
|
||||||
|
Some("1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_spreadsheets_url() {
|
||||||
|
let url = "https://docs.google.com/spreadsheets/d/1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W/edit?gid=1249300797#gid=1249300797";
|
||||||
|
assert_eq!(
|
||||||
|
extract_google_drive_id(url),
|
||||||
|
Some("1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_slides_url() {
|
||||||
|
let url = "https://docs.google.com/presentation/d/1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et/edit#slide=id.p1";
|
||||||
|
assert_eq!(
|
||||||
|
extract_google_drive_id(url),
|
||||||
|
Some("1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_missing_scheme() {
|
||||||
|
let url = "docs.google.com/document/d/abcdef12345/edit";
|
||||||
|
assert_eq!(extract_google_drive_id(url), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extra_path_segments() {
|
||||||
|
let url = "https://drive.google.com/file/d/1abcdEFGH_ijklMNOpqrstUVwxyz-1234/view";
|
||||||
|
assert_eq!(
|
||||||
|
extract_google_drive_id(url),
|
||||||
|
Some("1abcdEFGH_ijklMNOpqrstUVwxyz-1234")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_google_url() {
|
||||||
|
let url = "https://example.com/d/12345";
|
||||||
|
assert_eq!(extract_google_drive_id(url), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_no_d_segment() {
|
||||||
|
let url =
|
||||||
|
"https://docs.google.com/document/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit";
|
||||||
|
assert_eq!(extract_google_drive_id(url), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user