feat: add support for full url extraction for google_drive (#2121)

This commit is contained in:
Kalvin C
2025-04-09 18:11:31 -07:00
committed by GitHub
parent f1e29407cc
commit 64d17e5205

View File

@@ -4,6 +4,7 @@ pub mod storage;
use anyhow::{Context, Error};
use base64::Engine;
use indoc::indoc;
use lazy_static::lazy_static;
use mcp_core::tool::ToolAnnotations;
use oauth_pkce::PkceOAuth2Client;
use regex::Regex;
@@ -54,6 +55,18 @@ enum PaginationState {
End,
}
lazy_static! {
static ref GOOGLE_DRIVE_ID_REGEX: Regex =
Regex::new(r"^(?:https:\/\/)(?:[\w-]+\.)?google\.com\/(?:[^\/]+\/)*d\/([a-zA-Z0-9_-]+)")
.unwrap();
}
fn extract_google_drive_id(url: &str) -> Option<&str> {
GOOGLE_DRIVE_ID_REGEX
.captures(url)
.and_then(|caps| caps.get(1).map(|m| m.as_str()))
}
pub struct GoogleDriveRouter {
tools: Vec<Tool>,
instructions: String,
@@ -226,13 +239,15 @@ impl GoogleDriveRouter {
let read_tool = Tool::new(
"read".to_string(),
indoc! {r#"
Read a file from google drive using the file uri.
Read a file from google drive using the file URI or the full google drive URL.
One of URI or URL MUST is required.
Optionally include base64 encoded images, false by default.
Example extracting URIs from URLs:
Given "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr"
Pass in "gdrive:///1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc"
Do not include any other path parameters.
Do not include any other path parameters when using URI.
"#}
.to_string(),
json!({
@@ -240,14 +255,17 @@ impl GoogleDriveRouter {
"properties": {
"uri": {
"type": "string",
"description": "google drive uri of the file to read",
"description": "google drive uri of the file to read, use this when you have the file URI",
},
"url": {
"type": "string",
"description": "the full google drive URL to read the file from, use this when the user gives a full https url",
},
"includeImages": {
"type": "boolean",
"description": "Whether or not to include images as base64 encoded strings, defaults to false",
}
},
"required": ["uri"],
}),
Some(ToolAnnotations {
title: Some("Read GDrive".to_string()),
@@ -1186,23 +1204,46 @@ impl GoogleDriveRouter {
}
async fn read(&self, params: Value) -> Result<Vec<Content>, ToolError> {
let uri =
params
.get("uri")
.and_then(|q| q.as_str())
.ok_or(ToolError::InvalidParameters(
"The uri of the file is required".to_string(),
))?;
let (maybe_uri, maybe_url) = (
params.get("uri").and_then(|q| q.as_str()),
params.get("url").and_then(|q| q.as_str()),
);
let drive_uri = uri.replace("gdrive:///", "");
let drive_uri = match (maybe_uri, maybe_url) {
(Some(uri), None) => {
let drive_uri = uri.replace("gdrive:///", "");
// Validation: check for / path separators as invalid uris
if drive_uri.contains('/') {
return Err(ToolError::InvalidParameters(format!(
"The uri '{}' conatins extra '/'. Only the base URI is allowed.",
uri
)));
}
// Validation: check for / path separators as invalid uris
if drive_uri.contains('/') {
return Err(ToolError::InvalidParameters(format!(
"The uri '{}' conatins extra '/'. Only the base URI is allowed.",
uri
)));
}
drive_uri
}
(None, Some(url)) => {
if let Some(drive_uri) = extract_google_drive_id(url) {
drive_uri.to_string()
} else {
return Err(ToolError::InvalidParameters(format!(
"Failed to extract valid google drive URI from {}",
url
)));
}
}
(Some(_), Some(_)) => {
return Err(ToolError::InvalidParameters(
"Only one of 'uri' or 'url' should be provided".to_string(),
));
}
(None, None) => {
return Err(ToolError::InvalidParameters(
"Either 'uri' or 'url' must be provided".to_string(),
));
}
};
let include_images = params
.get("includeImages")
@@ -1211,7 +1252,10 @@ impl GoogleDriveRouter {
let metadata = self.fetch_file_metadata(&drive_uri).await?;
let mime_type = metadata.mime_type.ok_or_else(|| {
ToolError::ExecutionError(format!("Missing mime type in file metadata for {}.", uri))
ToolError::ExecutionError(format!(
"Missing mime type in file metadata for {}.",
drive_uri
))
})?;
// Handle Google Docs export
@@ -2797,3 +2841,63 @@ impl Clone for GoogleDriveRouter {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_document_url() {
let url = "https://docs.google.com/document/d/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit?tab=t.0#heading=h.5v419d3h97tr";
assert_eq!(
extract_google_drive_id(url),
Some("1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc")
);
}
#[test]
fn test_spreadsheets_url() {
let url = "https://docs.google.com/spreadsheets/d/1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W/edit?gid=1249300797#gid=1249300797";
assert_eq!(
extract_google_drive_id(url),
Some("1J5KHqWsGFzweuiQboX7dlm8Ejv90Po16ocEBahzCt4W")
);
}
#[test]
fn test_slides_url() {
let url = "https://docs.google.com/presentation/d/1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et/edit#slide=id.p1";
assert_eq!(
extract_google_drive_id(url),
Some("1zXWqsGpHJEu40oqb1omh68sW9liu7EKFBCdnPaJVoQ5et")
);
}
#[test]
fn test_missing_scheme() {
let url = "docs.google.com/document/d/abcdef12345/edit";
assert_eq!(extract_google_drive_id(url), None);
}
#[test]
fn test_extra_path_segments() {
let url = "https://drive.google.com/file/d/1abcdEFGH_ijklMNOpqrstUVwxyz-1234/view";
assert_eq!(
extract_google_drive_id(url),
Some("1abcdEFGH_ijklMNOpqrstUVwxyz-1234")
);
}
#[test]
fn test_invalid_google_url() {
let url = "https://example.com/d/12345";
assert_eq!(extract_google_drive_id(url), None);
}
#[test]
fn test_no_d_segment() {
let url =
"https://docs.google.com/document/1QG8d8wtWe7ZfmG93sW-1h2WXDJDUkOi-9hDnvJLmWrc/edit";
assert_eq!(extract_google_drive_id(url), None);
}
}