mirror of
https://github.com/aljazceru/goose.git
synced 2026-01-26 17:54:29 +01:00
64 lines
2.0 KiB
Python
64 lines
2.0 KiB
Python
# server.py
|
|
|
|
import requests
|
|
from requests.exceptions import RequestException
|
|
from bs4 import BeautifulSoup
|
|
from html2text import html2text
|
|
|
|
from mcp.server.fastmcp import FastMCP
|
|
from mcp.shared.exceptions import McpError
|
|
from mcp.types import ErrorData, INTERNAL_ERROR, INVALID_PARAMS
|
|
|
|
mcp = FastMCP("wiki")
|
|
|
|
@mcp.tool()
|
|
def read_wikipedia_article(url: str) -> str:
|
|
"""
|
|
Fetch a Wikipedia article at the provided URL, parse its main content,
|
|
convert it to Markdown, and return the resulting text.
|
|
|
|
Usage:
|
|
read_wikipedia_article("https://en.wikipedia.org/wiki/Python_(programming_language)")
|
|
"""
|
|
|
|
try:
|
|
# Validate input
|
|
if not url.startswith("http"):
|
|
raise ValueError("URL must start with http or https.")
|
|
|
|
response = requests.get(url, timeout=10)
|
|
if response.status_code != 200:
|
|
raise McpError(
|
|
ErrorData(
|
|
INTERNAL_ERROR,
|
|
f"Failed to retrieve the article. HTTP status code: {response.status_code}"
|
|
)
|
|
)
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
content_div = soup.find("div", {"id": "mw-content-text"})
|
|
if not content_div:
|
|
raise McpError(
|
|
ErrorData(
|
|
INVALID_PARAMS,
|
|
"Could not find the main content on the provided Wikipedia URL."
|
|
)
|
|
)
|
|
|
|
# Convert to Markdown
|
|
markdown_text = html2text(str(content_div))
|
|
return markdown_text
|
|
|
|
except ValueError as e:
|
|
# Raised when input parameters are invalid
|
|
raise McpError(ErrorData(INVALID_PARAMS, str(e))) from e
|
|
except RequestException as e:
|
|
# Network or connection errors
|
|
raise McpError(ErrorData(INTERNAL_ERROR, f"Request error: {str(e)}")) from e
|
|
except Exception as e:
|
|
# Catch-all for any other unexpected errors
|
|
raise McpError(ErrorData(INTERNAL_ERROR, f"Unexpected error: {str(e)}")) from e
|
|
|
|
|