mirror of
https://github.com/aljazceru/enclava.git
synced 2025-12-18 07:54:29 +01:00
removing lite llm and going directly for privatemode
This commit is contained in:
@@ -23,7 +23,9 @@ from fastapi import APIRouter, HTTPException, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.services.litellm_client import LiteLLMClient
|
||||
from app.services.llm.service import llm_service
|
||||
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
|
||||
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
|
||||
from app.services.base_module import BaseModule, Permission
|
||||
from app.models.user import User
|
||||
from app.models.chatbot import ChatbotInstance as DBChatbotInstance, ChatbotConversation as DBConversation, ChatbotMessage as DBMessage, ChatbotAnalytics
|
||||
@@ -32,7 +34,8 @@ from app.db.database import get_db
|
||||
from app.core.config import settings
|
||||
|
||||
# Import protocols for type hints and dependency injection
|
||||
from ..protocols import RAGServiceProtocol, LiteLLMClientProtocol
|
||||
from ..protocols import RAGServiceProtocol
|
||||
# Note: LiteLLMClientProtocol replaced with direct LLM service usage
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -131,10 +134,8 @@ class ChatbotInstance(BaseModel):
|
||||
class ChatbotModule(BaseModule):
|
||||
"""Main chatbot module implementation"""
|
||||
|
||||
def __init__(self, litellm_client: Optional[LiteLLMClientProtocol] = None,
|
||||
rag_service: Optional[RAGServiceProtocol] = None):
|
||||
def __init__(self, rag_service: Optional[RAGServiceProtocol] = None):
|
||||
super().__init__("chatbot")
|
||||
self.litellm_client = litellm_client
|
||||
self.rag_module = rag_service # Keep same name for compatibility
|
||||
self.db_session = None
|
||||
|
||||
@@ -145,15 +146,10 @@ class ChatbotModule(BaseModule):
|
||||
"""Initialize the chatbot module"""
|
||||
await super().initialize(**kwargs)
|
||||
|
||||
# Get dependencies from global services if not already injected
|
||||
if not self.litellm_client:
|
||||
try:
|
||||
from app.services.litellm_client import litellm_client
|
||||
self.litellm_client = litellm_client
|
||||
logger.info("LiteLLM client injected from global service")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not inject LiteLLM client: {e}")
|
||||
# Initialize the LLM service
|
||||
await llm_service.initialize()
|
||||
|
||||
# Get RAG module dependency if not already injected
|
||||
if not self.rag_module:
|
||||
try:
|
||||
# Try to get RAG module from module manager
|
||||
@@ -168,19 +164,16 @@ class ChatbotModule(BaseModule):
|
||||
await self._load_prompt_templates()
|
||||
|
||||
logger.info("Chatbot module initialized")
|
||||
logger.info(f"LiteLLM client available after init: {self.litellm_client is not None}")
|
||||
logger.info(f"LLM service available: {llm_service._initialized}")
|
||||
logger.info(f"RAG module available after init: {self.rag_module is not None}")
|
||||
logger.info(f"Loaded {len(self.system_prompts)} prompt templates")
|
||||
|
||||
async def _ensure_dependencies(self):
|
||||
"""Lazy load dependencies if not available"""
|
||||
if not self.litellm_client:
|
||||
try:
|
||||
from app.services.litellm_client import litellm_client
|
||||
self.litellm_client = litellm_client
|
||||
logger.info("LiteLLM client lazy loaded")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not lazy load LiteLLM client: {e}")
|
||||
# Ensure LLM service is initialized
|
||||
if not llm_service._initialized:
|
||||
await llm_service.initialize()
|
||||
logger.info("LLM service lazy loaded")
|
||||
|
||||
if not self.rag_module:
|
||||
try:
|
||||
@@ -468,45 +461,58 @@ class ChatbotModule(BaseModule):
|
||||
logger.info(msg['content'])
|
||||
logger.info("=== END COMPREHENSIVE LLM REQUEST ===")
|
||||
|
||||
if self.litellm_client:
|
||||
try:
|
||||
logger.info("Calling LiteLLM client create_chat_completion...")
|
||||
response = await self.litellm_client.create_chat_completion(
|
||||
model=config.model,
|
||||
messages=messages,
|
||||
user_id="chatbot_user",
|
||||
api_key_id="chatbot_api_key",
|
||||
temperature=config.temperature,
|
||||
max_tokens=config.max_tokens
|
||||
)
|
||||
logger.info(f"LiteLLM response received, response keys: {list(response.keys())}")
|
||||
try:
|
||||
logger.info("Calling LLM service create_chat_completion...")
|
||||
|
||||
# Convert messages to LLM service format
|
||||
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
|
||||
|
||||
# Create LLM service request
|
||||
llm_request = LLMChatRequest(
|
||||
model=config.model,
|
||||
messages=llm_messages,
|
||||
temperature=config.temperature,
|
||||
max_tokens=config.max_tokens,
|
||||
user_id="chatbot_user",
|
||||
api_key_id=0 # Chatbot module uses internal service
|
||||
)
|
||||
|
||||
# Make request to LLM service
|
||||
llm_response = await llm_service.create_chat_completion(llm_request)
|
||||
|
||||
# Extract response content
|
||||
if llm_response.choices:
|
||||
content = llm_response.choices[0].message.content
|
||||
logger.info(f"Response content length: {len(content)}")
|
||||
|
||||
# Extract response content from the LiteLLM response format
|
||||
if 'choices' in response and response['choices']:
|
||||
content = response['choices'][0]['message']['content']
|
||||
logger.info(f"Response content length: {len(content)}")
|
||||
|
||||
# Always log response for debugging
|
||||
logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
|
||||
logger.info(f"Response content ({len(content)} chars):")
|
||||
logger.info(content)
|
||||
if 'usage' in response:
|
||||
usage = response['usage']
|
||||
logger.info(f"Token usage - Prompt: {usage.get('prompt_tokens', 'N/A')}, Completion: {usage.get('completion_tokens', 'N/A')}, Total: {usage.get('total_tokens', 'N/A')}")
|
||||
if sources:
|
||||
logger.info(f"RAG sources included: {len(sources)} documents")
|
||||
logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
|
||||
|
||||
return content, sources
|
||||
else:
|
||||
logger.warning("No choices in LiteLLM response")
|
||||
return "I received an empty response from the AI model.", sources
|
||||
except Exception as e:
|
||||
logger.error(f"LiteLLM completion failed: {e}")
|
||||
raise e
|
||||
else:
|
||||
logger.warning("No LiteLLM client available, using fallback")
|
||||
# Fallback if no LLM client
|
||||
# Always log response for debugging
|
||||
logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
|
||||
logger.info(f"Response content ({len(content)} chars):")
|
||||
logger.info(content)
|
||||
if llm_response.usage:
|
||||
usage = llm_response.usage
|
||||
logger.info(f"Token usage - Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}, Total: {usage.total_tokens}")
|
||||
if sources:
|
||||
logger.info(f"RAG sources included: {len(sources)} documents")
|
||||
logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
|
||||
|
||||
return content, sources
|
||||
else:
|
||||
logger.warning("No choices in LLM response")
|
||||
return "I received an empty response from the AI model.", sources
|
||||
|
||||
except SecurityError as e:
|
||||
logger.error(f"Security error in LLM completion: {e}")
|
||||
raise HTTPException(status_code=400, detail=f"Security validation failed: {e.message}")
|
||||
except ProviderError as e:
|
||||
logger.error(f"Provider error in LLM completion: {e}")
|
||||
raise HTTPException(status_code=503, detail="LLM service temporarily unavailable")
|
||||
except LLMError as e:
|
||||
logger.error(f"LLM service error: {e}")
|
||||
raise HTTPException(status_code=500, detail="LLM service error")
|
||||
except Exception as e:
|
||||
logger.error(f"LLM completion failed: {e}")
|
||||
# Return fallback if available
|
||||
return "I'm currently unable to process your request. Please try again later.", None
|
||||
|
||||
def _build_conversation_messages(self, db_messages: List[DBMessage], config: ChatbotConfig,
|
||||
@@ -685,7 +691,7 @@ class ChatbotModule(BaseModule):
|
||||
# Lazy load dependencies
|
||||
await self._ensure_dependencies()
|
||||
|
||||
logger.info(f"LiteLLM client available: {self.litellm_client is not None}")
|
||||
logger.info(f"LLM service available: {llm_service._initialized}")
|
||||
logger.info(f"RAG module available: {self.rag_module is not None}")
|
||||
|
||||
try:
|
||||
@@ -884,10 +890,9 @@ class ChatbotModule(BaseModule):
|
||||
|
||||
|
||||
# Module factory function
|
||||
def create_module(litellm_client: Optional[LiteLLMClientProtocol] = None,
|
||||
rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
|
||||
def create_module(rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
|
||||
"""Factory function to create chatbot module instance"""
|
||||
return ChatbotModule(litellm_client=litellm_client, rag_service=rag_service)
|
||||
return ChatbotModule(rag_service=rag_service)
|
||||
|
||||
# Create module instance (dependencies will be injected via factory)
|
||||
chatbot_module = ChatbotModule()
|
||||
@@ -401,7 +401,7 @@ class RAGModule(BaseModule):
|
||||
"""Initialize embedding model"""
|
||||
from app.services.embedding_service import embedding_service
|
||||
|
||||
# Use privatemode-embeddings for LiteLLM integration
|
||||
# Use privatemode-embeddings for LLM service integration
|
||||
model_name = self.config.get("embedding_model", "privatemode-embeddings")
|
||||
embedding_service.model_name = model_name
|
||||
|
||||
|
||||
@@ -22,13 +22,16 @@ from fastapi import APIRouter, HTTPException, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import select
|
||||
from app.core.logging import get_logger
|
||||
from app.services.litellm_client import LiteLLMClient
|
||||
from app.services.llm.service import llm_service
|
||||
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
|
||||
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
|
||||
from app.services.base_module import Permission
|
||||
from app.db.database import SessionLocal
|
||||
from app.models.workflow import WorkflowDefinition as DBWorkflowDefinition, WorkflowExecution as DBWorkflowExecution
|
||||
|
||||
# Import protocols for type hints and dependency injection
|
||||
from ..protocols import ChatbotServiceProtocol, LiteLLMClientProtocol
|
||||
from ..protocols import ChatbotServiceProtocol
|
||||
# Note: LiteLLMClientProtocol replaced with direct LLM service usage
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -234,8 +237,7 @@ class WorkflowExecution(BaseModel):
|
||||
class WorkflowEngine:
|
||||
"""Core workflow execution engine"""
|
||||
|
||||
def __init__(self, litellm_client: LiteLLMClient, chatbot_service: Optional[ChatbotServiceProtocol] = None):
|
||||
self.litellm_client = litellm_client
|
||||
def __init__(self, chatbot_service: Optional[ChatbotServiceProtocol] = None):
|
||||
self.chatbot_service = chatbot_service
|
||||
self.executions: Dict[str, WorkflowExecution] = {}
|
||||
self.workflows: Dict[str, WorkflowDefinition] = {}
|
||||
@@ -343,15 +345,23 @@ class WorkflowEngine:
|
||||
# Template message content with context variables
|
||||
messages = self._template_messages(llm_step.messages, context.variables)
|
||||
|
||||
# Make LLM call
|
||||
response = await self.litellm_client.chat_completion(
|
||||
# Convert messages to LLM service format
|
||||
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
|
||||
|
||||
# Create LLM service request
|
||||
llm_request = LLMChatRequest(
|
||||
model=llm_step.model,
|
||||
messages=messages,
|
||||
**llm_step.parameters
|
||||
messages=llm_messages,
|
||||
user_id="workflow_user",
|
||||
api_key_id=0, # Workflow module uses internal service
|
||||
**{k: v for k, v in llm_step.parameters.items() if k in ['temperature', 'max_tokens', 'top_p', 'frequency_penalty', 'presence_penalty', 'stop']}
|
||||
)
|
||||
|
||||
# Make LLM call
|
||||
response = await llm_service.create_chat_completion(llm_request)
|
||||
|
||||
# Store result
|
||||
result = response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
result = response.choices[0].message.content if response.choices else ""
|
||||
context.variables[llm_step.output_variable] = result
|
||||
context.results[step.id] = result
|
||||
|
||||
@@ -631,16 +641,21 @@ class WorkflowEngine:
|
||||
|
||||
messages = [{"role": "user", "content": self._template_string(prompt, variables)}]
|
||||
|
||||
response = await self.litellm_client.create_chat_completion(
|
||||
# Convert to LLM service format
|
||||
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
|
||||
|
||||
llm_request = LLMChatRequest(
|
||||
model=step.model,
|
||||
messages=messages,
|
||||
messages=llm_messages,
|
||||
user_id="workflow_system",
|
||||
api_key_id="workflow",
|
||||
api_key_id=0,
|
||||
temperature=step.temperature,
|
||||
max_tokens=step.max_tokens
|
||||
)
|
||||
|
||||
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
response = await llm_service.create_chat_completion(llm_request)
|
||||
|
||||
return response.choices[0].message.content if response.choices else ""
|
||||
|
||||
async def _generate_brand_names(self, variables: Dict[str, Any], step: AIGenerationStep) -> List[Dict[str, str]]:
|
||||
"""Generate brand names for a specific category"""
|
||||
@@ -687,16 +702,21 @@ class WorkflowEngine:
|
||||
|
||||
messages = [{"role": "user", "content": self._template_string(prompt, variables)}]
|
||||
|
||||
response = await self.litellm_client.create_chat_completion(
|
||||
# Convert to LLM service format
|
||||
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
|
||||
|
||||
llm_request = LLMChatRequest(
|
||||
model=step.model,
|
||||
messages=messages,
|
||||
messages=llm_messages,
|
||||
user_id="workflow_system",
|
||||
api_key_id="workflow",
|
||||
api_key_id=0,
|
||||
temperature=step.temperature,
|
||||
max_tokens=step.max_tokens
|
||||
)
|
||||
|
||||
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
response = await llm_service.create_chat_completion(llm_request)
|
||||
|
||||
return response.choices[0].message.content if response.choices else ""
|
||||
|
||||
async def _generate_custom_prompt(self, variables: Dict[str, Any], step: AIGenerationStep) -> str:
|
||||
"""Generate content using custom prompt template"""
|
||||
@@ -705,16 +725,21 @@ class WorkflowEngine:
|
||||
|
||||
messages = [{"role": "user", "content": self._template_string(step.prompt_template, variables)}]
|
||||
|
||||
response = await self.litellm_client.create_chat_completion(
|
||||
# Convert to LLM service format
|
||||
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
|
||||
|
||||
llm_request = LLMChatRequest(
|
||||
model=step.model,
|
||||
messages=messages,
|
||||
messages=llm_messages,
|
||||
user_id="workflow_system",
|
||||
api_key_id="workflow",
|
||||
api_key_id=0,
|
||||
temperature=step.temperature,
|
||||
max_tokens=step.max_tokens
|
||||
)
|
||||
|
||||
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
response = await llm_service.create_chat_completion(llm_request)
|
||||
|
||||
return response.choices[0].message.content if response.choices else ""
|
||||
|
||||
async def _execute_aggregate_step(self, step: WorkflowStep, context: WorkflowContext):
|
||||
"""Execute aggregate step to combine multiple inputs"""
|
||||
|
||||
@@ -23,7 +23,8 @@ from app.core.config import settings
|
||||
from app.db.database import async_session_factory
|
||||
from app.models.user import User
|
||||
from app.models.chatbot import ChatbotInstance
|
||||
from app.services.litellm_client import LiteLLMClient
|
||||
from app.services.llm.service import llm_service
|
||||
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
|
||||
from cryptography.fernet import Fernet
|
||||
import base64
|
||||
import os
|
||||
@@ -65,8 +66,8 @@ class ZammadModule(BaseModule):
|
||||
try:
|
||||
logger.info("Initializing Zammad module...")
|
||||
|
||||
# Initialize LLM client for chatbot integration
|
||||
self.llm_client = LiteLLMClient()
|
||||
# Initialize LLM service for chatbot integration
|
||||
# Note: llm_service is already a global singleton, no need to create instance
|
||||
|
||||
# Create HTTP session pool for Zammad API calls
|
||||
timeout = aiohttp.ClientTimeout(total=60, connect=10)
|
||||
@@ -597,19 +598,21 @@ class ZammadModule(BaseModule):
|
||||
}
|
||||
]
|
||||
|
||||
# Generate summary using LLM client
|
||||
response = await self.llm_client.create_chat_completion(
|
||||
messages=messages,
|
||||
# Generate summary using new LLM service
|
||||
chat_request = LLMChatRequest(
|
||||
model=await self._get_chatbot_model(config.chatbot_id),
|
||||
user_id=str(config.user_id),
|
||||
api_key_id=0, # Using 0 for module requests
|
||||
messages=[LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages],
|
||||
temperature=0.3,
|
||||
max_tokens=500
|
||||
max_tokens=500,
|
||||
user_id=str(config.user_id),
|
||||
api_key_id=0 # Using 0 for module requests
|
||||
)
|
||||
|
||||
# Extract content from LiteLLM response
|
||||
if "choices" in response and len(response["choices"]) > 0:
|
||||
return response["choices"][0]["message"]["content"].strip()
|
||||
response = await llm_service.create_chat_completion(chat_request)
|
||||
|
||||
# Extract content from new LLM service response
|
||||
if response.choices and len(response.choices) > 0:
|
||||
return response.choices[0].message.content.strip()
|
||||
|
||||
return "Unable to generate summary."
|
||||
|
||||
|
||||
Reference in New Issue
Block a user