mirror of
https://github.com/aljazceru/enclava.git
synced 2025-12-17 07:24:34 +01:00
playground internal api migration
This commit is contained in:
@@ -3,14 +3,17 @@ Internal LLM API endpoints - for frontend use with JWT authentication
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.database import get_db
|
||||
from app.core.security import get_current_user
|
||||
from app.services.llm.service import llm_service
|
||||
from app.services.llm.models import ChatRequest, ChatMessage as LLMChatMessage
|
||||
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError, ValidationError
|
||||
from app.api.v1.llm import get_cached_models # Reuse the caching logic
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -108,4 +111,96 @@ async def get_metrics(
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to retrieve metrics"
|
||||
)
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
"""Request model for chat completions"""
|
||||
model: str
|
||||
messages: List[Dict[str, str]]
|
||||
temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
|
||||
max_tokens: Optional[int] = Field(default=1000, ge=1)
|
||||
top_p: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
|
||||
stream: Optional[bool] = False
|
||||
|
||||
|
||||
@router.post("/chat/completions")
|
||||
async def create_chat_completion(
|
||||
request: ChatCompletionRequest,
|
||||
current_user: Dict[str, Any] = Depends(get_current_user),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Create chat completion for authenticated frontend users.
|
||||
This endpoint is for playground and internal use only, using JWT authentication.
|
||||
"""
|
||||
try:
|
||||
# Get user ID from JWT token context
|
||||
user_id = str(current_user.get("id", current_user.get("sub", "0")))
|
||||
|
||||
# Convert request to LLM service format
|
||||
# For internal use, we use a special api_key_id of 0 to indicate JWT auth
|
||||
chat_request = ChatRequest(
|
||||
model=request.model,
|
||||
messages=[
|
||||
LLMChatMessage(role=msg["role"], content=msg["content"])
|
||||
for msg in request.messages
|
||||
],
|
||||
temperature=request.temperature,
|
||||
max_tokens=request.max_tokens,
|
||||
top_p=request.top_p,
|
||||
stream=request.stream,
|
||||
user_id=user_id,
|
||||
api_key_id=0 # Special value for JWT-authenticated requests
|
||||
)
|
||||
|
||||
# Log the request for debugging
|
||||
logger.info(f"Internal chat completion request from user {current_user.get('id')}: model={request.model}")
|
||||
|
||||
# Process the request through the LLM service
|
||||
response = await llm_service.create_chat_completion(chat_request)
|
||||
|
||||
# Format the response to match OpenAI's structure
|
||||
formatted_response = {
|
||||
"id": response.id,
|
||||
"object": "chat.completion",
|
||||
"created": response.created,
|
||||
"model": response.model,
|
||||
"choices": [
|
||||
{
|
||||
"index": choice.index,
|
||||
"message": {
|
||||
"role": choice.message.role,
|
||||
"content": choice.message.content
|
||||
},
|
||||
"finish_reason": choice.finish_reason
|
||||
}
|
||||
for choice in response.choices
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
|
||||
"completion_tokens": response.usage.completion_tokens if response.usage else 0,
|
||||
"total_tokens": response.usage.total_tokens if response.usage else 0
|
||||
} if response.usage else None
|
||||
}
|
||||
|
||||
return formatted_response
|
||||
|
||||
except ValidationError as e:
|
||||
logger.error(f"Validation error in chat completion: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid request: {str(e)}"
|
||||
)
|
||||
except LLMError as e:
|
||||
logger.error(f"LLM service error: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail=f"LLM service error: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in chat completion: {e}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to process chat completion"
|
||||
)
|
||||
Reference in New Issue
Block a user