feat: add support for full url extraction for google_drive (#2121)

This commit is contained in:
Kalvin C
2025-04-09 18:11:31 -07:00
committed by GitHub
parent f1e29407cc
commit 64d17e5205

View File

@@ -4,6 +4,7 @@ pub mod storage;
use anyhow::{Context, Error}; use anyhow::{Context, Error};
use base64::Engine; use base64::Engine;
use indoc::indoc; use indoc::indoc;
use lazy_static::lazy_static;
use mcp_core::tool::ToolAnnotations; use mcp_core::tool::ToolAnnotations;
use oauth_pkce::PkceOAuth2Client; use oauth_pkce::PkceOAuth2Client;
use regex::Regex; use regex::Regex;
@@ -54,6 +55,18 @@ enum PaginationState {
End, End,
} }
lazy_static! {
static ref GOOGLE_DRIVE_ID_REGEX: Regex =
Regex::new(r"^(?:https:\/\/)(?:[\w-]+\.)?google\.com\/(?:[^\/]+\/)*d\/([a-zA-Z0-9_-]+)")
.unwrap();
}
fn extract_google_drive_id(url: &str) -> Option<&str> {
GOOGLE_DRIVE_ID_REGEX
.captures(url)
.and_then(|caps| caps.get(1).map(|m| m.as_str()))
}
pub struct GoogleDriveRouter { pub struct GoogleDriveRouter {
tools: Vec<Tool>, tools: Vec<Tool>,
instructions: String, instructions: String,
@@ -226,13 +239,15 @@ impl GoogleDriveRouter {
let read_tool = Tool::new( let read_tool = Tool::new(
"read".to_string(), "read".to_string(),
indoc! {r#" indoc! {r#"
Read a file from google drive using the file uri. Read a file from google drive using the file URI or the full google drive URL.
One of URI or URL MUST is required.
Optionally include base64 encoded images, false by default. Optionally include base64 encoded images, false by default.
Example extracting URIs from URLs: Example extracting URIs from URLs:
Given "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr" Given "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr"
Pass in "gdrive:///1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc" Pass in "gdrive:///1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc"
Do not include any other path parameters. Do not include any other path parameters when using URI.
"#} "#}
.to_string(), .to_string(),
json!({ json!({
@@ -240,14 +255,17 @@ impl GoogleDriveRouter {
"properties": { "properties": {
"uri": { "uri": {
"type": "string", "type": "string",
"description": "google drive uri of the file to read", "description": "google drive uri of the file to read, use this when you have the file URI",
},
"url": {
"type": "string",
"description": "the full google drive URL to read the file from, use this when the user gives a full https url",
}, },
"includeImages": { "includeImages": {
"type": "boolean", "type": "boolean",
"description": "Whether or not to include images as base64 encoded strings, defaults to false", "description": "Whether or not to include images as base64 encoded strings, defaults to false",
} }
}, },
"required": ["uri"],
}), }),
Some(ToolAnnotations { Some(ToolAnnotations {
title: Some("Read GDrive".to_string()), title: Some("Read GDrive".to_string()),
@@ -1186,23 +1204,46 @@ impl GoogleDriveRouter {
} }
async fn read(&self, params: Value) -> Result<Vec<Content>, ToolError> { async fn read(&self, params: Value) -> Result<Vec<Content>, ToolError> {
let uri = let (maybe_uri, maybe_url) = (
params params.get("uri").and_then(|q| q.as_str()),
.get("uri") params.get("url").and_then(|q| q.as_str()),
.and_then(|q| q.as_str()) );
.ok_or(ToolError::InvalidParameters(
"The uri of the file is required".to_string(),
))?;
let drive_uri = uri.replace("gdrive:///", ""); let drive_uri = match (maybe_uri, maybe_url) {
(Some(uri), None) => {
let drive_uri = uri.replace("gdrive:///", "");
// Validation: check for / path separators as invalid uris // Validation: check for / path separators as invalid uris
if drive_uri.contains('/') { if drive_uri.contains('/') {
return Err(ToolError::InvalidParameters(format!( return Err(ToolError::InvalidParameters(format!(
"The uri '{}' conatins extra '/'. Only the base URI is allowed.", "The uri '{}' conatins extra '/'. Only the base URI is allowed.",
uri uri
))); )));
} }
drive_uri
}
(None, Some(url)) => {
if let Some(drive_uri) = extract_google_drive_id(url) {
drive_uri.to_string()
} else {
return Err(ToolError::InvalidParameters(format!(
"Failed to extract valid google drive URI from {}",
url
)));
}
}
(Some(_), Some(_)) => {
return Err(ToolError::InvalidParameters(
"Only one of 'uri' or 'url' should be provided".to_string(),
));
}
(None, None) => {
return Err(ToolError::InvalidParameters(
"Either 'uri' or 'url' must be provided".to_string(),
));
}
};
let include_images = params let include_images = params
.get("includeImages") .get("includeImages")
@@ -1211,7 +1252,10 @@ impl GoogleDriveRouter {
let metadata = self.fetch_file_metadata(&drive_uri).await?; let metadata = self.fetch_file_metadata(&drive_uri).await?;
let mime_type = metadata.mime_type.ok_or_else(|| { let mime_type = metadata.mime_type.ok_or_else(|| {
ToolError::ExecutionError(format!("Missing mime type in file metadata for {}.", uri)) ToolError::ExecutionError(format!(
"Missing mime type in file metadata for {}.",
drive_uri
))
})?; })?;
// Handle Google Docs export // Handle Google Docs export
@@ -2797,3 +2841,63 @@ impl Clone for GoogleDriveRouter {
} }
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document_url() {
let url = "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr";
assert_eq!(
extract_google_drive_id(url),
Some("1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc")
);
}
#[test]
fn test_spreadsheets_url() {
let url = "https://docs.google.com/spreadsheets/d/1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W/edit?gid=1249300797#gid=1249300797";
assert_eq!(
extract_google_drive_id(url),
Some("1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W")
);
}
#[test]
fn test_slides_url() {
let url = "https://docs.google.com/presentation/d/1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et/edit#slide=id.p1";
assert_eq!(
extract_google_drive_id(url),
Some("1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et")
);
}
#[test]
fn test_missing_scheme() {
let url = "docs.google.com/document/d/abcdef12345/edit";
assert_eq!(extract_google_drive_id(url), None);
}
#[test]
fn test_extra_path_segments() {
let url = "https://drive.google.com/file/d/1abcdEFGH_ijklMNOpqrstUVwxyz-1234/view";
assert_eq!(
extract_google_drive_id(url),
Some("1abcdEFGH_ijklMNOpqrstUVwxyz-1234")
);
}
#[test]
fn test_invalid_google_url() {
let url = "https://example.com/d/12345";
assert_eq!(extract_google_drive_id(url), None);
}
#[test]
fn test_no_d_segment() {
let url =
"https://docs.google.com/document/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit";
assert_eq!(extract_google_drive_id(url), None);
}
}