removing lite llm and going directly for privatemode

This commit is contained in:
2025-08-21 08:44:05 +02:00
parent be581b28f8
commit 27ee8b4cdb
16 changed files with 1775 additions and 677 deletions

View File

@@ -23,7 +23,9 @@ from fastapi import APIRouter, HTTPException, Depends
from sqlalchemy.orm import Session
from app.core.logging import get_logger
from app.services.litellm_client import LiteLLMClient
from app.services.llm.service import llm_service
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
from app.services.base_module import BaseModule, Permission
from app.models.user import User
from app.models.chatbot import ChatbotInstance as DBChatbotInstance, ChatbotConversation as DBConversation, ChatbotMessage as DBMessage, ChatbotAnalytics
@@ -32,7 +34,8 @@ from app.db.database import get_db
from app.core.config import settings
# Import protocols for type hints and dependency injection
from ..protocols import RAGServiceProtocol, LiteLLMClientProtocol
from ..protocols import RAGServiceProtocol
# Note: LiteLLMClientProtocol replaced with direct LLM service usage
logger = get_logger(__name__)
@@ -131,10 +134,8 @@ class ChatbotInstance(BaseModel):
class ChatbotModule(BaseModule):
"""Main chatbot module implementation"""
def __init__(self, litellm_client: Optional[LiteLLMClientProtocol] = None,
rag_service: Optional[RAGServiceProtocol] = None):
def __init__(self, rag_service: Optional[RAGServiceProtocol] = None):
super().__init__("chatbot")
self.litellm_client = litellm_client
self.rag_module = rag_service # Keep same name for compatibility
self.db_session = None
@@ -145,15 +146,10 @@ class ChatbotModule(BaseModule):
"""Initialize the chatbot module"""
await super().initialize(**kwargs)
# Get dependencies from global services if not already injected
if not self.litellm_client:
try:
from app.services.litellm_client import litellm_client
self.litellm_client = litellm_client
logger.info("LiteLLM client injected from global service")
except Exception as e:
logger.warning(f"Could not inject LiteLLM client: {e}")
# Initialize the LLM service
await llm_service.initialize()
# Get RAG module dependency if not already injected
if not self.rag_module:
try:
# Try to get RAG module from module manager
@@ -168,19 +164,16 @@ class ChatbotModule(BaseModule):
await self._load_prompt_templates()
logger.info("Chatbot module initialized")
logger.info(f"LiteLLM client available after init: {self.litellm_client is not None}")
logger.info(f"LLM service available: {llm_service._initialized}")
logger.info(f"RAG module available after init: {self.rag_module is not None}")
logger.info(f"Loaded {len(self.system_prompts)} prompt templates")
async def _ensure_dependencies(self):
"""Lazy load dependencies if not available"""
if not self.litellm_client:
try:
from app.services.litellm_client import litellm_client
self.litellm_client = litellm_client
logger.info("LiteLLM client lazy loaded")
except Exception as e:
logger.warning(f"Could not lazy load LiteLLM client: {e}")
# Ensure LLM service is initialized
if not llm_service._initialized:
await llm_service.initialize()
logger.info("LLM service lazy loaded")
if not self.rag_module:
try:
@@ -468,45 +461,58 @@ class ChatbotModule(BaseModule):
logger.info(msg['content'])
logger.info("=== END COMPREHENSIVE LLM REQUEST ===")
if self.litellm_client:
try:
logger.info("Calling LiteLLM client create_chat_completion...")
response = await self.litellm_client.create_chat_completion(
model=config.model,
messages=messages,
user_id="chatbot_user",
api_key_id="chatbot_api_key",
temperature=config.temperature,
max_tokens=config.max_tokens
)
logger.info(f"LiteLLM response received, response keys: {list(response.keys())}")
try:
logger.info("Calling LLM service create_chat_completion...")
# Convert messages to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
# Create LLM service request
llm_request = LLMChatRequest(
model=config.model,
messages=llm_messages,
temperature=config.temperature,
max_tokens=config.max_tokens,
user_id="chatbot_user",
api_key_id=0 # Chatbot module uses internal service
)
# Make request to LLM service
llm_response = await llm_service.create_chat_completion(llm_request)
# Extract response content
if llm_response.choices:
content = llm_response.choices[0].message.content
logger.info(f"Response content length: {len(content)}")
# Extract response content from the LiteLLM response format
if 'choices' in response and response['choices']:
content = response['choices'][0]['message']['content']
logger.info(f"Response content length: {len(content)}")
# Always log response for debugging
logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
logger.info(f"Response content ({len(content)} chars):")
logger.info(content)
if 'usage' in response:
usage = response['usage']
logger.info(f"Token usage - Prompt: {usage.get('prompt_tokens', 'N/A')}, Completion: {usage.get('completion_tokens', 'N/A')}, Total: {usage.get('total_tokens', 'N/A')}")
if sources:
logger.info(f"RAG sources included: {len(sources)} documents")
logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
return content, sources
else:
logger.warning("No choices in LiteLLM response")
return "I received an empty response from the AI model.", sources
except Exception as e:
logger.error(f"LiteLLM completion failed: {e}")
raise e
else:
logger.warning("No LiteLLM client available, using fallback")
# Fallback if no LLM client
# Always log response for debugging
logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
logger.info(f"Response content ({len(content)} chars):")
logger.info(content)
if llm_response.usage:
usage = llm_response.usage
logger.info(f"Token usage - Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}, Total: {usage.total_tokens}")
if sources:
logger.info(f"RAG sources included: {len(sources)} documents")
logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
return content, sources
else:
logger.warning("No choices in LLM response")
return "I received an empty response from the AI model.", sources
except SecurityError as e:
logger.error(f"Security error in LLM completion: {e}")
raise HTTPException(status_code=400, detail=f"Security validation failed: {e.message}")
except ProviderError as e:
logger.error(f"Provider error in LLM completion: {e}")
raise HTTPException(status_code=503, detail="LLM service temporarily unavailable")
except LLMError as e:
logger.error(f"LLM service error: {e}")
raise HTTPException(status_code=500, detail="LLM service error")
except Exception as e:
logger.error(f"LLM completion failed: {e}")
# Return fallback if available
return "I'm currently unable to process your request. Please try again later.", None
def _build_conversation_messages(self, db_messages: List[DBMessage], config: ChatbotConfig,
@@ -685,7 +691,7 @@ class ChatbotModule(BaseModule):
# Lazy load dependencies
await self._ensure_dependencies()
logger.info(f"LiteLLM client available: {self.litellm_client is not None}")
logger.info(f"LLM service available: {llm_service._initialized}")
logger.info(f"RAG module available: {self.rag_module is not None}")
try:
@@ -884,10 +890,9 @@ class ChatbotModule(BaseModule):
# Module factory function
def create_module(litellm_client: Optional[LiteLLMClientProtocol] = None,
rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
def create_module(rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
"""Factory function to create chatbot module instance"""
return ChatbotModule(litellm_client=litellm_client, rag_service=rag_service)
return ChatbotModule(rag_service=rag_service)
# Create module instance (dependencies will be injected via factory)
chatbot_module = ChatbotModule()

View File

@@ -401,7 +401,7 @@ class RAGModule(BaseModule):
"""Initialize embedding model"""
from app.services.embedding_service import embedding_service
# Use privatemode-embeddings for LiteLLM integration
# Use privatemode-embeddings for LLM service integration
model_name = self.config.get("embedding_model", "privatemode-embeddings")
embedding_service.model_name = model_name

View File

@@ -22,13 +22,16 @@ from fastapi import APIRouter, HTTPException, Depends
from sqlalchemy.orm import Session
from sqlalchemy import select
from app.core.logging import get_logger
from app.services.litellm_client import LiteLLMClient
from app.services.llm.service import llm_service
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
from app.services.base_module import Permission
from app.db.database import SessionLocal
from app.models.workflow import WorkflowDefinition as DBWorkflowDefinition, WorkflowExecution as DBWorkflowExecution
# Import protocols for type hints and dependency injection
from ..protocols import ChatbotServiceProtocol, LiteLLMClientProtocol
from ..protocols import ChatbotServiceProtocol
# Note: LiteLLMClientProtocol replaced with direct LLM service usage
logger = get_logger(__name__)
@@ -234,8 +237,7 @@ class WorkflowExecution(BaseModel):
class WorkflowEngine:
"""Core workflow execution engine"""
def __init__(self, litellm_client: LiteLLMClient, chatbot_service: Optional[ChatbotServiceProtocol] = None):
self.litellm_client = litellm_client
def __init__(self, chatbot_service: Optional[ChatbotServiceProtocol] = None):
self.chatbot_service = chatbot_service
self.executions: Dict[str, WorkflowExecution] = {}
self.workflows: Dict[str, WorkflowDefinition] = {}
@@ -343,15 +345,23 @@ class WorkflowEngine:
# Template message content with context variables
messages = self._template_messages(llm_step.messages, context.variables)
# Make LLM call
response = await self.litellm_client.chat_completion(
# Convert messages to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
# Create LLM service request
llm_request = LLMChatRequest(
model=llm_step.model,
messages=messages,
**llm_step.parameters
messages=llm_messages,
user_id="workflow_user",
api_key_id=0, # Workflow module uses internal service
**{k: v for k, v in llm_step.parameters.items() if k in ['temperature', 'max_tokens', 'top_p', 'frequency_penalty', 'presence_penalty', 'stop']}
)
# Make LLM call
response = await llm_service.create_chat_completion(llm_request)
# Store result
result = response.get("choices", [{}])[0].get("message", {}).get("content", "")
result = response.choices[0].message.content if response.choices else ""
context.variables[llm_step.output_variable] = result
context.results[step.id] = result
@@ -631,16 +641,21 @@ class WorkflowEngine:
messages = [{"role": "user", "content": self._template_string(prompt, variables)}]
response = await self.litellm_client.create_chat_completion(
# Convert to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
llm_request = LLMChatRequest(
model=step.model,
messages=messages,
messages=llm_messages,
user_id="workflow_system",
api_key_id="workflow",
api_key_id=0,
temperature=step.temperature,
max_tokens=step.max_tokens
)
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
response = await llm_service.create_chat_completion(llm_request)
return response.choices[0].message.content if response.choices else ""
async def _generate_brand_names(self, variables: Dict[str, Any], step: AIGenerationStep) -> List[Dict[str, str]]:
"""Generate brand names for a specific category"""
@@ -687,16 +702,21 @@ class WorkflowEngine:
messages = [{"role": "user", "content": self._template_string(prompt, variables)}]
response = await self.litellm_client.create_chat_completion(
# Convert to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
llm_request = LLMChatRequest(
model=step.model,
messages=messages,
messages=llm_messages,
user_id="workflow_system",
api_key_id="workflow",
api_key_id=0,
temperature=step.temperature,
max_tokens=step.max_tokens
)
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
response = await llm_service.create_chat_completion(llm_request)
return response.choices[0].message.content if response.choices else ""
async def _generate_custom_prompt(self, variables: Dict[str, Any], step: AIGenerationStep) -> str:
"""Generate content using custom prompt template"""
@@ -705,16 +725,21 @@ class WorkflowEngine:
messages = [{"role": "user", "content": self._template_string(step.prompt_template, variables)}]
response = await self.litellm_client.create_chat_completion(
# Convert to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
llm_request = LLMChatRequest(
model=step.model,
messages=messages,
messages=llm_messages,
user_id="workflow_system",
api_key_id="workflow",
api_key_id=0,
temperature=step.temperature,
max_tokens=step.max_tokens
)
return response.get("choices", [{}])[0].get("message", {}).get("content", "")
response = await llm_service.create_chat_completion(llm_request)
return response.choices[0].message.content if response.choices else ""
async def _execute_aggregate_step(self, step: WorkflowStep, context: WorkflowContext):
"""Execute aggregate step to combine multiple inputs"""

View File

@@ -23,7 +23,8 @@ from app.core.config import settings
from app.db.database import async_session_factory
from app.models.user import User
from app.models.chatbot import ChatbotInstance
from app.services.litellm_client import LiteLLMClient
from app.services.llm.service import llm_service
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
from cryptography.fernet import Fernet
import base64
import os
@@ -65,8 +66,8 @@ class ZammadModule(BaseModule):
try:
logger.info("Initializing Zammad module...")
# Initialize LLM client for chatbot integration
self.llm_client = LiteLLMClient()
# Initialize LLM service for chatbot integration
# Note: llm_service is already a global singleton, no need to create instance
# Create HTTP session pool for Zammad API calls
timeout = aiohttp.ClientTimeout(total=60, connect=10)
@@ -597,19 +598,21 @@ class ZammadModule(BaseModule):
}
]
# Generate summary using LLM client
response = await self.llm_client.create_chat_completion(
messages=messages,
# Generate summary using new LLM service
chat_request = LLMChatRequest(
model=await self._get_chatbot_model(config.chatbot_id),
user_id=str(config.user_id),
api_key_id=0, # Using 0 for module requests
messages=[LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages],
temperature=0.3,
max_tokens=500
max_tokens=500,
user_id=str(config.user_id),
api_key_id=0 # Using 0 for module requests
)
# Extract content from LiteLLM response
if "choices" in response and len(response["choices"]) > 0:
return response["choices"][0]["message"]["content"].strip()
response = await llm_service.create_chat_completion(chat_request)
# Extract content from new LLM service response
if response.choices and len(response.choices) > 0:
return response.choices[0].message.content.strip()
return "Unable to generate summary."