rag improvements 2

This commit is contained in:
2025-09-23 15:47:33 +02:00
parent f8d127ff42
commit d4d420a03a
13 changed files with 4308 additions and 0 deletions

66
.gitignore vendored
View File

@@ -0,0 +1,66 @@
# Python
__pycache__/
*.py[cod]
*.pyo
*.pyd
*.env
*.venv
env/
venv/
ENV/
env.bak/
venv.bak/
*.sqlite3
*.db
# FastAPI logs
*.log
# Node.js
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# Next.js build
frontend/.next/
frontend/out/
frontend/.env.local
frontend/.env.production
frontend/.env.development
backend/storage/
# TypeScript
*.tsbuildinfo
# Coverage reports
htmlcov/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.pyc
*.pyo
*.pyd
.pytest_cache/
backend/.pytest_cache/
backend/.mypy_cache/
.mypy_cache/
*.prof
backend/_to_delete/
backend/__pycache__/
backend/app/core/__pycache__/
backend/app/services/__pycache__/
backend/app/services/llm/__pycache__/
backend/app/services/llm/providers/__pycache__/
backend/app/utils/__pycache__/
backend/modules/rag/__pycache__/
frontend/.next/
frontend/node_modules/
node_modules/
venv/

View File

@@ -0,0 +1,21 @@
"""
Chatbot Module - AI Chatbot with RAG Integration
This module provides AI chatbot capabilities with:
- Multiple personality types (Assistant, Customer Support, Teacher, etc.)
- RAG integration for knowledge-based responses
- Conversation memory and context management
- Workflow integration as building blocks
- UI-configurable settings
"""
from .main import ChatbotModule, create_module
__version__ = "1.0.0"
__author__ = "Enclava Team"
# Export main classes for easy importing
__all__ = [
"ChatbotModule",
"create_module"
]

View File

@@ -0,0 +1,126 @@
{
"title": "Chatbot Configuration",
"type": "object",
"properties": {
"name": {
"type": "string",
"title": "Chatbot Name",
"description": "Display name for this chatbot instance",
"minLength": 1,
"maxLength": 100
},
"chatbot_type": {
"type": "string",
"title": "Chatbot Type",
"description": "Select the type of chatbot personality",
"enum": ["assistant", "customer_support", "teacher", "researcher", "creative_writer", "custom"],
"enumNames": ["General Assistant", "Customer Support", "Teacher", "Researcher", "Creative Writer", "Custom"],
"default": "assistant"
},
"model": {
"type": "string",
"title": "AI Model",
"description": "Choose the LLM model for responses",
"enum": ["gpt-4", "gpt-3.5-turbo", "claude-3-sonnet", "claude-3-opus", "llama-70b"],
"default": "gpt-3.5-turbo"
},
"system_prompt": {
"type": "string",
"title": "System Prompt",
"description": "Define the chatbot's personality and behavior instructions",
"ui:widget": "textarea",
"ui:options": {
"rows": 6,
"placeholder": "You are a helpful AI assistant..."
}
},
"use_rag": {
"type": "boolean",
"title": "Enable Knowledge Base",
"description": "Use RAG to search knowledge base for context",
"default": false
},
"rag_collection": {
"type": "string",
"title": "Knowledge Base Collection",
"description": "Select which document collection to search",
"ui:widget": "rag-collection-selector",
"ui:condition": "use_rag === true"
},
"rag_top_k": {
"type": "integer",
"title": "Knowledge Base Results",
"description": "Number of relevant documents to include",
"minimum": 1,
"maximum": 10,
"default": 5,
"ui:condition": "use_rag === true"
},
"temperature": {
"type": "number",
"title": "Response Creativity",
"description": "Controls randomness (0.0 = focused, 1.0 = creative)",
"minimum": 0,
"maximum": 1,
"default": 0.7,
"ui:widget": "range",
"ui:options": {
"step": 0.1
}
},
"max_tokens": {
"type": "integer",
"title": "Maximum Response Length",
"description": "Maximum number of tokens in response",
"minimum": 50,
"maximum": 4000,
"default": 1000,
"ui:widget": "range",
"ui:options": {
"step": 50
}
},
"memory_length": {
"type": "integer",
"title": "Conversation Memory",
"description": "Number of previous message pairs to remember",
"minimum": 1,
"maximum": 50,
"default": 10,
"ui:widget": "range"
},
"fallback_responses": {
"type": "array",
"title": "Fallback Responses",
"description": "Responses to use when the AI cannot answer",
"items": {
"type": "string",
"title": "Fallback Response"
},
"default": [
"I'm not sure how to help with that. Could you please rephrase your question?",
"I don't have enough information to answer that question accurately.",
"That's outside my knowledge area. Is there something else I can help you with?"
],
"ui:options": {
"orderable": true,
"addable": true,
"removable": true
}
}
},
"required": ["name", "chatbot_type", "model"],
"ui:order": [
"name",
"chatbot_type",
"model",
"system_prompt",
"use_rag",
"rag_collection",
"rag_top_k",
"temperature",
"max_tokens",
"memory_length",
"fallback_responses"
]
}

View File

@@ -0,0 +1,182 @@
{
"name": "Customer Support Workflow",
"description": "Intelligent customer support workflow with intent classification, knowledge base search, and chatbot response generation",
"version": "1.0",
"variables": {
"support_chatbot_id": "cs-bot-001",
"escalation_threshold": 0.3,
"max_attempts": 3
},
"steps": [
{
"id": "classify_intent",
"name": "Classify Customer Intent",
"type": "llm_call",
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are an intent classifier for customer support. Classify the customer message into one of these categories: technical_issue, billing_question, feature_request, complaint, general_inquiry. Also provide a confidence score between 0 and 1. Respond with JSON: {\"intent\": \"category\", \"confidence\": 0.95, \"reasoning\": \"explanation\"}"
},
{
"role": "user",
"content": "{{ inputs.customer_message }}"
}
],
"output_variable": "intent_classification"
},
{
"id": "search_knowledge_base",
"name": "Search Knowledge Base",
"type": "workflow_step",
"module": "rag",
"action": "search",
"config": {
"query": "{{ inputs.customer_message }}",
"collection": "support_documentation",
"top_k": 5,
"include_metadata": true
},
"output_variable": "knowledge_results"
},
{
"id": "check_confidence",
"name": "Check Intent Confidence",
"type": "condition",
"condition": "JSON.parse(steps.classify_intent.result).confidence > variables.escalation_threshold",
"true_steps": [
{
"id": "generate_chatbot_response",
"name": "Generate Chatbot Response",
"type": "workflow_step",
"module": "chatbot",
"action": "workflow_chat_step",
"config": {
"message": "{{ inputs.customer_message }}",
"chatbot_id": "{{ variables.support_chatbot_id }}",
"use_rag": true,
"context": {
"intent": "{{ steps.classify_intent.result }}",
"knowledge_base_results": "{{ steps.search_knowledge_base.result }}",
"customer_history": "{{ inputs.customer_history }}",
"additional_instructions": "Be empathetic and professional. If you cannot fully resolve the issue, offer to escalate to a human agent."
}
},
"output_variable": "chatbot_response"
},
{
"id": "analyze_response_quality",
"name": "Analyze Response Quality",
"type": "llm_call",
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "Analyze if this customer support response adequately addresses the customer's question. Consider completeness, accuracy, and helpfulness. Respond with JSON: {\"quality_score\": 0.85, \"is_adequate\": true, \"requires_escalation\": false, \"reasoning\": \"explanation\"}"
},
{
"role": "user",
"content": "Customer Question: {{ inputs.customer_message }}\\n\\nChatbot Response: {{ steps.generate_chatbot_response.result.response }}\\n\\nKnowledge Base Context: {{ steps.search_knowledge_base.result }}"
}
],
"output_variable": "response_quality"
},
{
"id": "final_response_decision",
"name": "Final Response Decision",
"type": "condition",
"condition": "JSON.parse(steps.analyze_response_quality.result).is_adequate === true",
"true_steps": [
{
"id": "send_chatbot_response",
"name": "Send Chatbot Response",
"type": "output",
"config": {
"response_type": "chatbot_response",
"message": "{{ steps.generate_chatbot_response.result.response }}",
"sources": "{{ steps.generate_chatbot_response.result.sources }}",
"confidence": "{{ JSON.parse(steps.classify_intent.result).confidence }}",
"quality_score": "{{ JSON.parse(steps.analyze_response_quality.result).quality_score }}"
}
}
],
"false_steps": [
{
"id": "escalate_to_human",
"name": "Escalate to Human Agent",
"type": "output",
"config": {
"response_type": "human_escalation",
"message": "I'd like to connect you with one of our human support agents who can better assist with your specific situation. Please hold on while I transfer you.",
"escalation_reason": "Response quality below threshold",
"intent": "{{ steps.classify_intent.result }}",
"attempted_response": "{{ steps.generate_chatbot_response.result.response }}",
"priority": "normal"
}
}
]
}
],
"false_steps": [
{
"id": "low_confidence_escalation",
"name": "Low Confidence Escalation",
"type": "output",
"config": {
"response_type": "human_escalation",
"message": "I want to make sure you get the best possible help. Let me connect you with one of our human support agents.",
"escalation_reason": "Low intent classification confidence",
"intent": "{{ steps.classify_intent.result }}",
"priority": "high"
}
}
]
},
{
"id": "log_interaction",
"name": "Log Customer Interaction",
"type": "workflow_step",
"module": "analytics",
"action": "log_event",
"config": {
"event_type": "customer_support_interaction",
"data": {
"customer_message": "{{ inputs.customer_message }}",
"intent_classification": "{{ steps.classify_intent.result }}",
"response_generated": "{{ steps.generate_chatbot_response.result.response }}",
"knowledge_base_used": "{{ steps.search_knowledge_base.result }}",
"escalated": "{{ outputs.response_type === 'human_escalation' }}",
"workflow_execution_time": "{{ execution_time }}",
"timestamp": "{{ current_timestamp }}"
}
}
}
],
"outputs": {
"response_type": "string",
"message": "string",
"sources": "array",
"escalation_reason": "string",
"confidence": "number",
"quality_score": "number"
},
"error_handling": {
"retry_failed_steps": true,
"max_retries": 2,
"fallback_response": "I apologize, but I'm experiencing technical difficulties. Please contact our support team directly for assistance."
},
"metadata": {
"created_by": "support_team",
"use_case": "customer_support_automation",
"tags": ["customer_support", "chatbot", "rag", "escalation"],
"estimated_execution_time": "5-15 seconds"
}
}

View File

@@ -0,0 +1,936 @@
"""
Chatbot Module Implementation
Provides AI chatbot capabilities with:
- RAG integration for knowledge-based responses
- Custom prompts and personalities
- Conversation memory and context
- Workflow integration as building blocks
- UI-configurable settings
"""
import json
from pprint import pprint
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Union
from dataclasses import dataclass
from pydantic import BaseModel, Field
from enum import Enum
from fastapi import APIRouter, HTTPException, Depends
from sqlalchemy.orm import Session
from app.core.logging import get_logger
from app.services.llm.service import llm_service
from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
from app.services.base_module import BaseModule, Permission
from app.models.user import User
from app.models.chatbot import ChatbotInstance as DBChatbotInstance, ChatbotConversation as DBConversation, ChatbotMessage as DBMessage, ChatbotAnalytics
from app.core.security import get_current_user
from app.db.database import get_db
from app.core.config import settings
# Import protocols for type hints and dependency injection
from ..protocols import RAGServiceProtocol
# Note: LiteLLMClientProtocol replaced with direct LLM service usage
logger = get_logger(__name__)
class ChatbotType(str, Enum):
"""Types of chatbot personalities"""
ASSISTANT = "assistant"
CUSTOMER_SUPPORT = "customer_support"
TEACHER = "teacher"
RESEARCHER = "researcher"
CREATIVE_WRITER = "creative_writer"
CUSTOM = "custom"
class MessageRole(str, Enum):
"""Message roles in conversation"""
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
@dataclass
class ChatbotConfig:
"""Chatbot configuration"""
name: str
chatbot_type: str # Changed from ChatbotType enum to str to allow custom types
model: str
rag_collection: Optional[str] = None
system_prompt: str = ""
temperature: float = 0.7
max_tokens: int = 1000
memory_length: int = 10 # Number of previous messages to remember
use_rag: bool = False
rag_top_k: int = 5
rag_score_threshold: float = 0.02 # Lowered from default 0.3 to allow more results
fallback_responses: List[str] = None
def __post_init__(self):
if self.fallback_responses is None:
self.fallback_responses = [
"I'm not sure how to help with that. Could you please rephrase your question?",
"I don't have enough information to answer that question accurately.",
"That's outside my knowledge area. Is there something else I can help you with?"
]
class ChatMessage(BaseModel):
"""Individual chat message"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
role: MessageRole
content: str
timestamp: datetime = Field(default_factory=datetime.utcnow)
metadata: Dict[str, Any] = Field(default_factory=dict)
sources: Optional[List[Dict[str, Any]]] = None
class Conversation(BaseModel):
"""Conversation state"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
chatbot_id: str
user_id: str
messages: List[ChatMessage] = Field(default_factory=list)
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
metadata: Dict[str, Any] = Field(default_factory=dict)
class ChatRequest(BaseModel):
"""Chat completion request"""
message: str
conversation_id: Optional[str] = None
chatbot_id: str
use_rag: Optional[bool] = None
context: Optional[Dict[str, Any]] = None
class ChatResponse(BaseModel):
"""Chat completion response"""
response: str
conversation_id: str
message_id: str
sources: Optional[List[Dict[str, Any]]] = None
metadata: Dict[str, Any] = Field(default_factory=dict)
class ChatbotInstance(BaseModel):
"""Configured chatbot instance"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
name: str
config: ChatbotConfig
created_by: str
created_at: datetime = Field(default_factory=datetime.utcnow)
updated_at: datetime = Field(default_factory=datetime.utcnow)
is_active: bool = True
class ChatbotModule(BaseModule):
"""Main chatbot module implementation"""
def __init__(self, rag_service: Optional[RAGServiceProtocol] = None):
super().__init__("chatbot")
self.rag_module = rag_service # Keep same name for compatibility
self.db_session = None
# System prompts will be loaded from database
self.system_prompts = {}
async def initialize(self, **kwargs):
"""Initialize the chatbot module"""
await super().initialize(**kwargs)
# Initialize the LLM service
await llm_service.initialize()
# Get RAG module dependency if not already injected
if not self.rag_module:
try:
# Try to get RAG module from module manager
from app.services.module_manager import module_manager
if hasattr(module_manager, 'modules') and 'rag' in module_manager.modules:
self.rag_module = module_manager.modules['rag']
logger.info("RAG module injected from module manager")
except Exception as e:
logger.warning(f"Could not inject RAG module: {e}")
# Load prompt templates from database
await self._load_prompt_templates()
logger.info("Chatbot module initialized")
logger.info(f"LLM service available: {llm_service._initialized}")
logger.info(f"RAG module available after init: {self.rag_module is not None}")
logger.info(f"Loaded {len(self.system_prompts)} prompt templates")
async def _ensure_dependencies(self):
"""Lazy load dependencies if not available"""
# Ensure LLM service is initialized
if not llm_service._initialized:
await llm_service.initialize()
logger.info("LLM service lazy loaded")
if not self.rag_module:
try:
# Try to get RAG module from module manager
from app.services.module_manager import module_manager
if hasattr(module_manager, 'modules') and 'rag' in module_manager.modules:
self.rag_module = module_manager.modules['rag']
logger.info("RAG module lazy loaded from module manager")
except Exception as e:
logger.warning(f"Could not lazy load RAG module: {e}")
async def _load_prompt_templates(self):
"""Load prompt templates from database"""
try:
from app.db.database import SessionLocal
from app.models.prompt_template import PromptTemplate
from sqlalchemy import select
db = SessionLocal()
try:
result = db.execute(
select(PromptTemplate)
.where(PromptTemplate.is_active == True)
)
templates = result.scalars().all()
for template in templates:
self.system_prompts[template.type_key] = template.system_prompt
logger.info(f"Loaded {len(self.system_prompts)} prompt templates from database")
finally:
db.close()
except Exception as e:
logger.warning(f"Could not load prompt templates from database: {e}")
# Fallback to hardcoded prompts
self.system_prompts = {
"assistant": "You are a helpful AI assistant. Provide accurate, concise, and friendly responses. Always aim to be helpful while being honest about your limitations.",
"customer_support": "You are a professional customer support representative. Be empathetic, professional, and solution-focused in all interactions.",
"teacher": "You are an experienced educational tutor. Break down complex concepts into understandable parts. Be patient, supportive, and encouraging.",
"researcher": "You are a thorough research assistant with a focus on accuracy and evidence-based information.",
"creative_writer": "You are an experienced creative writing mentor and storytelling expert.",
"custom": "You are a helpful AI assistant. Your personality and behavior will be defined by custom instructions."
}
async def get_system_prompt_for_type(self, chatbot_type: str) -> str:
"""Get system prompt for a specific chatbot type"""
if chatbot_type in self.system_prompts:
return self.system_prompts[chatbot_type]
# If not found, try to reload templates
await self._load_prompt_templates()
return self.system_prompts.get(chatbot_type, self.system_prompts.get("assistant",
"You are a helpful AI assistant. Provide accurate, concise, and friendly responses."))
async def create_chatbot(self, config: ChatbotConfig, user_id: str, db: Session) -> ChatbotInstance:
"""Create a new chatbot instance"""
# Set system prompt based on type if not provided or empty
if not config.system_prompt or config.system_prompt.strip() == "":
config.system_prompt = await self.get_system_prompt_for_type(config.chatbot_type)
# Create database record
db_chatbot = DBChatbotInstance(
name=config.name,
description=f"{config.chatbot_type.replace('_', ' ').title()} chatbot",
config=config.__dict__,
created_by=user_id
)
db.add(db_chatbot)
db.commit()
db.refresh(db_chatbot)
# Convert to response model
chatbot = ChatbotInstance(
id=db_chatbot.id,
name=db_chatbot.name,
config=ChatbotConfig(**db_chatbot.config),
created_by=db_chatbot.created_by,
created_at=db_chatbot.created_at,
updated_at=db_chatbot.updated_at,
is_active=db_chatbot.is_active
)
logger.info(f"Created new chatbot: {chatbot.name} ({chatbot.id})")
return chatbot
async def chat_completion(self, request: ChatRequest, user_id: str, db: Session) -> ChatResponse:
"""Generate chat completion response"""
# Get chatbot configuration from database
db_chatbot = db.query(DBChatbotInstance).filter(DBChatbotInstance.id == request.chatbot_id).first()
if not db_chatbot:
raise HTTPException(status_code=404, detail="Chatbot not found")
chatbot_config = ChatbotConfig(**db_chatbot.config)
# Get or create conversation
conversation = await self._get_or_create_conversation(
request.conversation_id, request.chatbot_id, user_id, db
)
# Create user message
user_message = DBMessage(
conversation_id=conversation.id,
role=MessageRole.USER.value,
content=request.message
)
db.add(user_message)
db.commit()
db.refresh(user_message)
logger.info(f"Created user message with ID {user_message.id} for conversation {conversation.id}")
try:
# Force the session to see the committed changes
db.expire_all()
# Get conversation history for context - includes the current message we just created
# Fetch up to memory_length pairs of messages (user + assistant)
# The +1 ensures we include the current message if we're at the limit
messages = db.query(DBMessage).filter(
DBMessage.conversation_id == conversation.id
).order_by(DBMessage.timestamp.desc()).limit(chatbot_config.memory_length * 2 + 1).all()
logger.info(f"Query for conversation_id={conversation.id}, memory_length={chatbot_config.memory_length}")
logger.info(f"Found {len(messages)} messages in conversation history")
# If we don't have any messages, manually add the user message we just created
if len(messages) == 0:
logger.warning(f"No messages found in query, but we just created message {user_message.id}")
logger.warning(f"Using the user message we just created")
messages = [user_message]
for idx, msg in enumerate(messages):
logger.info(f"Message {idx}: id={msg.id}, role={msg.role}, content_preview={msg.content[:50] if msg.content else 'None'}...")
# Generate response
response_content, sources = await self._generate_response(
request.message, messages, chatbot_config, request.context, db
)
# Create assistant message
assistant_message = DBMessage(
conversation_id=conversation.id,
role=MessageRole.ASSISTANT.value,
content=response_content,
sources=sources,
metadata={"model": chatbot_config.model, "temperature": chatbot_config.temperature}
)
db.add(assistant_message)
db.commit()
db.refresh(assistant_message)
# Update conversation timestamp
conversation.updated_at = datetime.utcnow()
db.commit()
return ChatResponse(
response=response_content,
conversation_id=conversation.id,
message_id=assistant_message.id,
sources=sources
)
except Exception as e:
logger.error(f"Chat completion failed: {e}")
# Return fallback response
fallback = chatbot_config.fallback_responses[0] if chatbot_config.fallback_responses else "I'm having trouble responding right now."
assistant_message = DBMessage(
conversation_id=conversation.id,
role=MessageRole.ASSISTANT.value,
content=fallback,
metadata={"error": str(e), "fallback": True}
)
db.add(assistant_message)
db.commit()
db.refresh(assistant_message)
return ChatResponse(
response=fallback,
conversation_id=conversation.id,
message_id=assistant_message.id,
metadata={"error": str(e), "fallback": True}
)
async def _generate_response(self, message: str, db_messages: List[DBMessage],
config: ChatbotConfig, context: Optional[Dict] = None, db: Session = None) -> tuple[str, Optional[List]]:
"""Generate response using LLM with optional RAG"""
# Lazy load dependencies if not available
await self._ensure_dependencies()
sources = None
rag_context = ""
# Helper: detect encryption-related queries for extra care
def _is_encryption_query(q: str) -> bool:
ql = (q or "").lower()
return any(k in ql for k in ["encrypt", "encryption", "encrypted", "decrypt", "decryption", "sd card", "microsd", "micro-sd"])
is_encryption = _is_encryption_query(message)
# RAG search if enabled
if config.use_rag and config.rag_collection and self.rag_module:
logger.info(f"RAG search enabled for collection: {config.rag_collection}")
try:
# Get the Qdrant collection name from RAG collection
qdrant_collection_name = await self._get_qdrant_collection_name(config.rag_collection, db)
logger.info(f"Qdrant collection name: {qdrant_collection_name}")
if qdrant_collection_name:
logger.info(f"Searching RAG documents: query='{message[:50]}...', max_results={config.rag_top_k}")
rag_results = await self.rag_module.search_documents(
query=message,
max_results=config.rag_top_k,
collection_name=qdrant_collection_name,
score_threshold=config.rag_score_threshold
)
# If the user asks about encryption, prefer results that explicitly mention it
if rag_results and is_encryption:
kw = ["encrypt", "encryption", "encrypted", "decrypt", "decryption"]
filtered = [r for r in rag_results if any(k in (r.document.content or "").lower() for k in kw)]
if filtered:
rag_results = filtered + [r for r in rag_results if r not in filtered]
if rag_results:
logger.info(f"RAG search found {len(rag_results)} results")
sources = [{"title": f"Document {i+1}", "content": result.document.content[:200]}
for i, result in enumerate(rag_results)]
# Build full RAG context from all results
rag_context = "\n\nRelevant information from knowledge base:\n" + "\n\n".join([
f"[Document {i+1}]:\n{result.document.content}" for i, result in enumerate(rag_results)
])
# Detailed RAG logging - ALWAYS log for debugging
logger.info("=== COMPREHENSIVE RAG SEARCH RESULTS ===")
logger.info(f"Query: '{message}'")
logger.info(f"Collection: {qdrant_collection_name}")
logger.info(f"Number of results: {len(rag_results)}")
for i, result in enumerate(rag_results):
logger.info(f"\n--- RAG Result {i+1} ---")
logger.info(f"Score: {getattr(result, 'score', 'N/A')}")
logger.info(f"Document ID: {getattr(result.document, 'id', 'N/A')}")
logger.info(f"Full Content ({len(result.document.content)} chars):")
logger.info(f"{result.document.content}")
if hasattr(result.document, 'metadata'):
logger.info(f"Metadata: {result.document.metadata}")
logger.info(f"\n=== RAG CONTEXT BEING ADDED TO PROMPT ({len(rag_context)} chars) ===")
logger.info(rag_context)
logger.info("=== END RAG SEARCH RESULTS ===")
else:
logger.warning("RAG search returned no results")
else:
logger.warning(f"RAG collection '{config.rag_collection}' not found in database")
except Exception as e:
logger.warning(f"RAG search failed: {e}")
import traceback
logger.warning(f"RAG search traceback: {traceback.format_exc()}")
# Build conversation context (includes the current message from db_messages)
# Inject strict grounding instructions when RAG is used, especially for encryption questions
extra_instructions = {}
if config.use_rag:
guardrails = (
"Answer strictly using the 'Relevant information' provided. "
"If the information does not explicitly answer the question, say you don't have enough information instead of guessing. "
)
if is_encryption:
guardrails += (
"When asked about encryption or SD-card backups, do not claim that backups are encrypted unless the provided context explicitly uses wording like 'encrypt', 'encrypted', or 'encryption'. "
"If such wording is absent, state clearly that the SD-card backup is not encrypted. "
)
extra_instructions["additional_instructions"] = guardrails
messages = self._build_conversation_messages(db_messages, config, rag_context, extra_instructions)
# Note: Current user message is already included in db_messages from the query
logger.info(f"Built conversation context with {len(messages)} messages")
# LLM completion
logger.info(f"Attempting LLM completion with model: {config.model}")
logger.info(f"Messages to send: {len(messages)} messages")
# Always log detailed prompts for debugging
logger.info("=== COMPREHENSIVE LLM REQUEST ===")
logger.info(f"Model: {config.model}")
logger.info(f"Temperature: {config.temperature}")
logger.info(f"Max tokens: {config.max_tokens}")
logger.info(f"RAG enabled: {config.use_rag}")
logger.info(f"RAG collection: {config.rag_collection}")
if config.use_rag and rag_context:
logger.info(f"RAG context added: {len(rag_context)} characters")
logger.info(f"RAG sources: {len(sources) if sources else 0} documents")
logger.info("\n=== COMPLETE MESSAGES SENT TO LLM ===")
for i, msg in enumerate(messages):
logger.info(f"\n--- Message {i+1} ---")
logger.info(f"Role: {msg['role']}")
logger.info(f"Content ({len(msg['content'])} chars):")
# Truncate long content for logging (full RAG context can be very long)
if len(msg['content']) > 500:
logger.info(f"{msg['content'][:500]}... [truncated, total {len(msg['content'])} chars]")
else:
logger.info(msg['content'])
logger.info("=== END COMPREHENSIVE LLM REQUEST ===")
try:
logger.info("Calling LLM service create_chat_completion...")
# Convert messages to LLM service format
llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
# Create LLM service request
llm_request = LLMChatRequest(
model=config.model,
messages=llm_messages,
temperature=config.temperature,
max_tokens=config.max_tokens,
user_id="chatbot_user",
api_key_id=0 # Chatbot module uses internal service
)
# Make request to LLM service
llm_response = await llm_service.create_chat_completion(llm_request)
# Extract response content
if llm_response.choices:
content = llm_response.choices[0].message.content
logger.info(f"Response content length: {len(content)}")
# Always log response for debugging
logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
logger.info(f"Response content ({len(content)} chars):")
logger.info(content)
if llm_response.usage:
usage = llm_response.usage
logger.info(f"Token usage - Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}, Total: {usage.total_tokens}")
if sources:
logger.info(f"RAG sources included: {len(sources)} documents")
logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
return content, sources
else:
logger.warning("No choices in LLM response")
return "I received an empty response from the AI model.", sources
except SecurityError as e:
logger.error(f"Security error in LLM completion: {e}")
raise HTTPException(status_code=400, detail=f"Security validation failed: {e.message}")
except ProviderError as e:
logger.error(f"Provider error in LLM completion: {e}")
raise HTTPException(status_code=503, detail="LLM service temporarily unavailable")
except LLMError as e:
logger.error(f"LLM service error: {e}")
raise HTTPException(status_code=500, detail="LLM service error")
except Exception as e:
logger.error(f"LLM completion failed: {e}")
# Return fallback if available
return "I'm currently unable to process your request. Please try again later.", None
def _build_conversation_messages(self, db_messages: List[DBMessage], config: ChatbotConfig,
rag_context: str = "", context: Optional[Dict] = None) -> List[Dict]:
"""Build messages array for LLM completion"""
messages = []
# System prompt
system_prompt = config.system_prompt
if rag_context:
# Add explicit instruction to use RAG context
system_prompt += "\n\nIMPORTANT: Use the following information from the knowledge base to answer the user's question. " \
"This information is directly relevant to their query and should be your primary source:\n" + rag_context
if context and context.get('additional_instructions'):
system_prompt += f"\n\nAdditional instructions: {context['additional_instructions']}"
messages.append({"role": "system", "content": system_prompt})
logger.info(f"Building messages from {len(db_messages)} database messages")
# Conversation history (messages are already limited by memory_length in the query)
# Reverse to get chronological order
# Include ALL messages - the current user message is needed for the LLM to respond!
for idx, msg in enumerate(reversed(db_messages)):
logger.info(f"Processing message {idx}: role={msg.role}, content_preview={msg.content[:50] if msg.content else 'None'}...")
if msg.role in ["user", "assistant"]:
messages.append({
"role": msg.role,
"content": msg.content
})
logger.info(f"Added message with role {msg.role} to LLM messages")
else:
logger.info(f"Skipped message with role {msg.role}")
logger.info(f"Final messages array has {len(messages)} messages") # For debugging, can be removed in production
return messages
async def _get_or_create_conversation(self, conversation_id: Optional[str],
chatbot_id: str, user_id: str, db: Session) -> DBConversation:
"""Get existing conversation or create new one"""
if conversation_id:
conversation = db.query(DBConversation).filter(DBConversation.id == conversation_id).first()
if conversation:
return conversation
# Create new conversation
conversation = DBConversation(
chatbot_id=chatbot_id,
user_id=user_id,
title="New Conversation"
)
db.add(conversation)
db.commit()
db.refresh(conversation)
return conversation
def get_router(self) -> APIRouter:
"""Get FastAPI router for chatbot endpoints"""
router = APIRouter(prefix="/chatbot", tags=["chatbot"])
@router.post("/chat", response_model=ChatResponse)
async def chat_endpoint(
request: ChatRequest,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Chat completion endpoint"""
return await self.chat_completion(request, str(current_user['id']), db)
@router.post("/create", response_model=ChatbotInstance)
async def create_chatbot_endpoint(
config: ChatbotConfig,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Create new chatbot instance"""
return await self.create_chatbot(config, str(current_user['id']), db)
@router.get("/list", response_model=List[ChatbotInstance])
async def list_chatbots_endpoint(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""List user's chatbots"""
db_chatbots = db.query(DBChatbotInstance).filter(
(DBChatbotInstance.created_by == str(current_user['id'])) |
(DBChatbotInstance.created_by == "system")
).all()
chatbots = []
for db_chatbot in db_chatbots:
chatbot = ChatbotInstance(
id=db_chatbot.id,
name=db_chatbot.name,
config=ChatbotConfig(**db_chatbot.config),
created_by=db_chatbot.created_by,
created_at=db_chatbot.created_at,
updated_at=db_chatbot.updated_at,
is_active=db_chatbot.is_active
)
chatbots.append(chatbot)
return chatbots
@router.get("/conversations/{conversation_id}", response_model=Conversation)
async def get_conversation_endpoint(
conversation_id: str,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db)
):
"""Get conversation history"""
conversation = db.query(DBConversation).filter(
DBConversation.id == conversation_id
).first()
if not conversation:
raise HTTPException(status_code=404, detail="Conversation not found")
# Check if user owns this conversation
if conversation.user_id != str(current_user['id']):
raise HTTPException(status_code=403, detail="Not authorized")
# Get messages
messages = db.query(DBMessage).filter(
DBMessage.conversation_id == conversation_id
).order_by(DBMessage.timestamp).all()
# Convert to response model
chat_messages = []
for msg in messages:
chat_message = ChatMessage(
id=msg.id,
role=MessageRole(msg.role),
content=msg.content,
timestamp=msg.timestamp,
metadata=msg.metadata or {},
sources=msg.sources
)
chat_messages.append(chat_message)
response_conversation = Conversation(
id=conversation.id,
chatbot_id=conversation.chatbot_id,
user_id=conversation.user_id,
messages=chat_messages,
created_at=conversation.created_at,
updated_at=conversation.updated_at,
metadata=conversation.context_data or {}
)
return response_conversation
@router.get("/types", response_model=List[Dict[str, str]])
async def get_chatbot_types_endpoint():
"""Get available chatbot types and their descriptions"""
return [
{"type": "assistant", "name": "General Assistant", "description": "Helpful AI assistant for general questions"},
{"type": "customer_support", "name": "Customer Support", "description": "Professional customer service chatbot"},
{"type": "teacher", "name": "Teacher", "description": "Educational tutor and learning assistant"},
{"type": "researcher", "name": "Researcher", "description": "Research assistant with fact-checking focus"},
{"type": "creative_writer", "name": "Creative Writer", "description": "Creative writing and storytelling assistant"},
{"type": "custom", "name": "Custom", "description": "Custom chatbot with user-defined personality"}
]
return router
# API Compatibility Methods
async def chat(self, chatbot_config: Dict[str, Any], message: str,
conversation_history: List = None, user_id: str = "anonymous") -> Dict[str, Any]:
"""Chat method for API compatibility"""
logger.info(f"Chat method called with message: {message[:50]}... by user: {user_id}")
# Lazy load dependencies
await self._ensure_dependencies()
logger.info(f"LLM service available: {llm_service._initialized}")
logger.info(f"RAG module available: {self.rag_module is not None}")
try:
# Create a minimal database session for the chat
from app.db.database import SessionLocal
db = SessionLocal()
try:
# Convert config dict to ChatbotConfig
config = ChatbotConfig(
name=chatbot_config.get("name", "Unknown"),
chatbot_type=chatbot_config.get("chatbot_type", "assistant"),
model=chatbot_config.get("model", "gpt-3.5-turbo"),
system_prompt=chatbot_config.get("system_prompt", ""),
temperature=chatbot_config.get("temperature", 0.7),
max_tokens=chatbot_config.get("max_tokens", 1000),
memory_length=chatbot_config.get("memory_length", 10),
use_rag=chatbot_config.get("use_rag", False),
rag_collection=chatbot_config.get("rag_collection"),
rag_top_k=chatbot_config.get("rag_top_k", 5),
fallback_responses=chatbot_config.get("fallback_responses", [])
)
# Generate response using internal method
# Create a temporary message object for the current user message
temp_messages = [
DBMessage(
id=0,
conversation_id=0,
role="user",
content=message,
timestamp=datetime.utcnow(),
metadata={}
)
]
response_content, sources = await self._generate_response(
message, temp_messages, config, None, db
)
return {
"response": response_content,
"sources": sources,
"conversation_id": None,
"message_id": f"msg_{uuid.uuid4()}"
}
finally:
db.close()
except Exception as e:
logger.error(f"Chat method failed: {e}")
fallback_responses = chatbot_config.get("fallback_responses", [
"I'm sorry, I'm having trouble processing your request right now."
])
return {
"response": fallback_responses[0] if fallback_responses else "I'm sorry, I couldn't process your request.",
"sources": None,
"conversation_id": None,
"message_id": f"msg_{uuid.uuid4()}"
}
# Workflow Integration Methods
async def workflow_chat_step(self, context: Dict[str, Any], step_config: Dict[str, Any], db: Session) -> Dict[str, Any]:
"""Execute chatbot as a workflow step"""
message = step_config.get('message', '')
chatbot_id = step_config.get('chatbot_id')
use_rag = step_config.get('use_rag', False)
# Template substitution from context
message = self._substitute_template_variables(message, context)
request = ChatRequest(
message=message,
chatbot_id=chatbot_id,
use_rag=use_rag,
context=step_config.get('context', {})
)
# Use system user for workflow executions
response = await self.chat_completion(request, "workflow_system", db)
return {
"response": response.response,
"conversation_id": response.conversation_id,
"sources": response.sources,
"metadata": response.metadata
}
def _substitute_template_variables(self, template: str, context: Dict[str, Any]) -> str:
"""Simple template variable substitution"""
import re
def replace_var(match):
var_path = match.group(1)
try:
# Simple dot notation support: context.user.name
value = context
for part in var_path.split('.'):
value = value[part]
return str(value)
except (KeyError, TypeError):
return match.group(0) # Return original if not found
return re.sub(r'\\{\\{\\s*([^}]+)\\s*\\}\\}', replace_var, template)
async def _get_qdrant_collection_name(self, collection_identifier: str, db: Session) -> Optional[str]:
"""Get Qdrant collection name from RAG collection ID, name, or direct Qdrant collection"""
try:
from app.models.rag_collection import RagCollection
from sqlalchemy import select
logger.info(f"Looking up RAG collection with identifier: '{collection_identifier}'")
# First check if this might be a direct Qdrant collection name
# (e.g., starts with "ext_", "rag_", or contains specific patterns)
if collection_identifier.startswith(("ext_", "rag_", "test_")) or "_" in collection_identifier:
# Check if this collection exists in Qdrant directly
actual_collection_name = collection_identifier
# Remove "ext_" prefix if present
if collection_identifier.startswith("ext_"):
actual_collection_name = collection_identifier[4:]
logger.info(f"Checking if '{actual_collection_name}' exists in Qdrant directly")
if self.rag_module:
try:
# Try to verify the collection exists in Qdrant
from qdrant_client import QdrantClient
qdrant_client = QdrantClient(host="enclava-qdrant", port=6333)
collections = qdrant_client.get_collections()
collection_names = [c.name for c in collections.collections]
if actual_collection_name in collection_names:
logger.info(f"Found Qdrant collection directly: {actual_collection_name}")
return actual_collection_name
except Exception as e:
logger.warning(f"Error checking Qdrant collections: {e}")
rag_collection = None
# Then try PostgreSQL lookup by ID if numeric
if collection_identifier.isdigit():
logger.info(f"Treating '{collection_identifier}' as collection ID")
stmt = select(RagCollection).where(
RagCollection.id == int(collection_identifier),
RagCollection.is_active == True
)
result = db.execute(stmt)
rag_collection = result.scalar_one_or_none()
# If not found by ID, try to look up by name in PostgreSQL
if not rag_collection:
logger.info(f"Collection not found by ID, trying by name: '{collection_identifier}'")
stmt = select(RagCollection).where(
RagCollection.name == collection_identifier,
RagCollection.is_active == True
)
result = db.execute(stmt)
rag_collection = result.scalar_one_or_none()
if rag_collection:
logger.info(f"Found RAG collection: ID={rag_collection.id}, name='{rag_collection.name}', qdrant_collection='{rag_collection.qdrant_collection_name}'")
return rag_collection.qdrant_collection_name
else:
logger.warning(f"RAG collection '{collection_identifier}' not found in database (tried both ID and name)")
return None
except Exception as e:
logger.error(f"Error looking up RAG collection '{collection_identifier}': {e}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return None
# Required abstract methods from BaseModule
async def cleanup(self):
"""Cleanup chatbot module resources"""
logger.info("Chatbot module cleanup completed")
def get_required_permissions(self) -> List[Permission]:
"""Get required permissions for chatbot module"""
return [
Permission("chatbots", "create", "Create chatbot instances"),
Permission("chatbots", "configure", "Configure chatbot settings"),
Permission("chatbots", "chat", "Use chatbot for conversations"),
Permission("chatbots", "manage", "Manage all chatbots")
]
async def process_request(self, request_type: str, data: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
"""Process chatbot requests"""
if request_type == "chat":
# Handle chat requests
chat_request = ChatRequest(**data)
user_id = context.get("user_id", "anonymous")
db = context.get("db")
if db:
response = await self.chat_completion(chat_request, user_id, db)
return {
"success": True,
"response": response.response,
"conversation_id": response.conversation_id,
"sources": response.sources
}
return {"success": False, "error": f"Unknown request type: {request_type}"}
# Module factory function
def create_module(rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
"""Factory function to create chatbot module instance"""
return ChatbotModule(rag_service=rag_service)
# Create module instance (dependencies will be injected via factory)
chatbot_module = ChatbotModule()

View File

@@ -0,0 +1,110 @@
name: chatbot
version: 1.0.0
description: "AI Chatbot with RAG integration and customizable prompts"
author: "Enclava Team"
category: "conversation"
# Module lifecycle
enabled: true
auto_start: true
dependencies:
- rag
optional_dependencies:
- analytics
# Configuration
config_schema: "./config_schema.json"
ui_components: "./ui_components/"
# Module capabilities
provides:
- "chat_completion"
- "conversation_management"
- "chatbot_configuration"
consumes:
- "rag_search"
- "llm_completion"
# API endpoints
endpoints:
- path: "/chatbot/chat"
method: "POST"
description: "Generate chat completion"
- path: "/chatbot/create"
method: "POST"
description: "Create new chatbot instance"
- path: "/chatbot/list"
method: "GET"
description: "List user chatbots"
# UI Configuration
ui_config:
icon: "message-circle"
color: "#10B981"
category: "AI & ML"
# Configuration forms
forms:
- name: "basic_config"
title: "Basic Settings"
fields: ["name", "chatbot_type", "model"]
- name: "personality"
title: "Personality & Prompts"
fields: ["system_prompt", "temperature", "fallback_responses"]
- name: "knowledge_base"
title: "Knowledge Base"
fields: ["use_rag", "rag_collection", "rag_top_k"]
- name: "advanced"
title: "Advanced Settings"
fields: ["max_tokens", "memory_length"]
# Permissions
permissions:
- name: "chatbot.create"
description: "Create new chatbot instances"
- name: "chatbot.configure"
description: "Configure chatbot settings"
- name: "chatbot.chat"
description: "Use chatbot for conversations"
- name: "chatbot.manage"
description: "Manage all chatbots (admin)"
# Analytics events
analytics_events:
- name: "chatbot_created"
description: "New chatbot instance created"
- name: "chat_message_sent"
description: "User sent message to chatbot"
- name: "chat_response_generated"
description: "Chatbot generated response"
- name: "rag_context_used"
description: "RAG context was used in response"
# Health checks
health_checks:
- name: "llm_connectivity"
description: "Check LLM client connection"
- name: "rag_availability"
description: "Check RAG module availability"
- name: "conversation_memory"
description: "Check conversation storage health"
# Documentation
documentation:
readme: "./README.md"
examples: "./examples/"
api_docs: "./docs/api.md"

View File

@@ -0,0 +1,225 @@
"""
Module Factory for Confidential Empire
This factory creates and wires up all modules with their dependencies.
It ensures proper dependency injection while maintaining optimal performance
through direct method calls and minimal indirection.
"""
from typing import Dict, Optional, Any
import logging
# Import all modules
from .rag.main import RAGModule
from .chatbot.main import ChatbotModule, create_module as create_chatbot_module
from .workflow.main import WorkflowModule
# Import services that modules depend on
from app.services.litellm_client import LiteLLMClient
# Import protocols for type safety
from .protocols import (
RAGServiceProtocol,
ChatbotServiceProtocol,
LiteLLMClientProtocol,
WorkflowServiceProtocol,
ServiceRegistry
)
logger = logging.getLogger(__name__)
class ModuleFactory:
"""Factory for creating and wiring module dependencies"""
def __init__(self):
self.modules: Dict[str, Any] = {}
self.initialized = False
async def create_all_modules(self, config: Optional[Dict[str, Any]] = None) -> ServiceRegistry:
"""
Create all modules with proper dependency injection
Args:
config: Optional configuration for modules
Returns:
Dictionary of created modules with their dependencies wired
"""
config = config or {}
logger.info("Creating modules with dependency injection...")
# Step 1: Create LiteLLM client (shared dependency)
litellm_client = LiteLLMClient()
# Step 2: Create RAG module (no dependencies on other modules)
rag_module = RAGModule(config=config.get("rag", {}))
# Step 3: Create chatbot module with RAG dependency
chatbot_module = create_chatbot_module(
litellm_client=litellm_client,
rag_service=rag_module # RAG module implements RAGServiceProtocol
)
# Step 4: Create workflow module with chatbot dependency
workflow_module = WorkflowModule(
chatbot_service=chatbot_module # Chatbot module implements ChatbotServiceProtocol
)
# Store all modules
modules = {
"rag": rag_module,
"chatbot": chatbot_module,
"workflow": workflow_module
}
logger.info(f"Created {len(modules)} modules with dependencies wired")
# Initialize all modules
await self._initialize_modules(modules, config)
self.modules = modules
self.initialized = True
return modules
async def _initialize_modules(self, modules: Dict[str, Any], config: Dict[str, Any]):
"""Initialize all modules in dependency order"""
# Initialize in dependency order (modules with no deps first)
initialization_order = [
("rag", modules["rag"]),
("chatbot", modules["chatbot"]), # Depends on RAG
("workflow", modules["workflow"]) # Depends on Chatbot
]
for module_name, module in initialization_order:
try:
logger.info(f"Initializing {module_name} module...")
module_config = config.get(module_name, {})
# Different modules have different initialization patterns
if hasattr(module, 'initialize'):
if module_name == "rag":
await module.initialize()
else:
await module.initialize(**module_config)
logger.info(f"{module_name} module initialized successfully")
except Exception as e:
logger.error(f"❌ Failed to initialize {module_name} module: {e}")
raise RuntimeError(f"Module initialization failed: {module_name}") from e
async def cleanup_all_modules(self):
"""Cleanup all modules in reverse dependency order"""
if not self.initialized:
return
# Cleanup in reverse order
cleanup_order = ["workflow", "chatbot", "rag"]
for module_name in cleanup_order:
if module_name in self.modules:
try:
logger.info(f"Cleaning up {module_name} module...")
module = self.modules[module_name]
if hasattr(module, 'cleanup'):
await module.cleanup()
logger.info(f"{module_name} module cleaned up")
except Exception as e:
logger.error(f"❌ Error cleaning up {module_name}: {e}")
self.modules.clear()
self.initialized = False
def get_module(self, name: str) -> Optional[Any]:
"""Get a module by name"""
return self.modules.get(name)
def is_initialized(self) -> bool:
"""Check if factory is initialized"""
return self.initialized
# Global factory instance
module_factory = ModuleFactory()
# Convenience functions for external use
async def create_modules(config: Optional[Dict[str, Any]] = None) -> ServiceRegistry:
"""Create all modules with dependencies wired"""
return await module_factory.create_all_modules(config)
async def cleanup_modules():
"""Cleanup all modules"""
await module_factory.cleanup_all_modules()
def get_module(name: str) -> Optional[Any]:
"""Get a module by name"""
return module_factory.get_module(name)
def get_all_modules() -> Dict[str, Any]:
"""Get all modules"""
return module_factory.modules.copy()
# Factory functions for individual modules (for testing/special cases)
def create_rag_module(config: Optional[Dict[str, Any]] = None) -> RAGModule:
"""Create RAG module"""
return RAGModule(config=config or {})
def create_chatbot_with_rag(rag_service: RAGServiceProtocol,
litellm_client: LiteLLMClientProtocol) -> ChatbotModule:
"""Create chatbot module with RAG dependency"""
return create_chatbot_module(litellm_client=litellm_client, rag_service=rag_service)
def create_workflow_with_chatbot(chatbot_service: ChatbotServiceProtocol) -> WorkflowModule:
"""Create workflow module with chatbot dependency"""
return WorkflowModule(chatbot_service=chatbot_service)
# Module registry for backward compatibility
class ModuleRegistry:
"""Registry that provides access to modules (for backward compatibility)"""
def __init__(self, factory: ModuleFactory):
self._factory = factory
@property
def modules(self) -> Dict[str, Any]:
"""Get all modules (compatible with existing module_manager interface)"""
return self._factory.modules
def get(self, name: str) -> Optional[Any]:
"""Get module by name"""
return self._factory.get_module(name)
def __getitem__(self, name: str) -> Any:
"""Support dictionary-style access"""
module = self.get(name)
if module is None:
raise KeyError(f"Module '{name}' not found")
return module
def keys(self):
"""Get module names"""
return self._factory.modules.keys()
def values(self):
"""Get module instances"""
return self._factory.modules.values()
def items(self):
"""Get module name-instance pairs"""
return self._factory.modules.items()
# Create registry instance for backward compatibility
module_registry = ModuleRegistry(module_factory)

View File

@@ -0,0 +1,258 @@
"""
Module Protocols for Confidential Empire
This file defines the interface contracts that modules must implement for inter-module communication.
Using Python protocols provides compile-time type checking with zero runtime overhead.
"""
from typing import Protocol, Dict, List, Any, Optional, Union
from datetime import datetime
from abc import abstractmethod
class RAGServiceProtocol(Protocol):
"""Protocol for RAG (Retrieval-Augmented Generation) service interface"""
@abstractmethod
async def search(self, query: str, collection_name: str, top_k: int) -> Dict[str, Any]:
"""
Search for relevant documents
Args:
query: Search query string
collection_name: Name of the collection to search in
top_k: Number of top results to return
Returns:
Dictionary containing search results with 'results' key
"""
...
@abstractmethod
async def index_document(self, content: str, metadata: Dict[str, Any] = None) -> str:
"""
Index a document in the vector database
Args:
content: Document content to index
metadata: Optional metadata for the document
Returns:
Document ID
"""
...
@abstractmethod
async def delete_document(self, document_id: str) -> bool:
"""
Delete a document from the vector database
Args:
document_id: ID of document to delete
Returns:
True if successfully deleted
"""
...
class ChatbotServiceProtocol(Protocol):
"""Protocol for Chatbot service interface"""
@abstractmethod
async def chat_completion(self, request: Any, user_id: str, db: Any) -> Any:
"""
Generate chat completion response
Args:
request: Chat request object
user_id: ID of the user making the request
db: Database session
Returns:
Chat response object
"""
...
@abstractmethod
async def create_chatbot(self, config: Any, user_id: str, db: Any) -> Any:
"""
Create a new chatbot instance
Args:
config: Chatbot configuration
user_id: ID of the user creating the chatbot
db: Database session
Returns:
Created chatbot instance
"""
...
class LiteLLMClientProtocol(Protocol):
"""Protocol for LiteLLM client interface"""
@abstractmethod
async def completion(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Any:
"""
Create a completion using the specified model
Args:
model: Model name to use
messages: List of messages for the conversation
**kwargs: Additional parameters for the completion
Returns:
Completion response object
"""
...
@abstractmethod
async def create_chat_completion(self, model: str, messages: List[Dict[str, str]],
user_id: str, api_key_id: str, **kwargs) -> Any:
"""
Create a chat completion with user tracking
Args:
model: Model name to use
messages: List of messages for the conversation
user_id: ID of the user making the request
api_key_id: API key identifier
**kwargs: Additional parameters
Returns:
Chat completion response
"""
...
class CacheServiceProtocol(Protocol):
"""Protocol for Cache service interface"""
@abstractmethod
async def get(self, key: str, default: Any = None) -> Any:
"""
Get value from cache
Args:
key: Cache key
default: Default value if key not found
Returns:
Cached value or default
"""
...
@abstractmethod
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
"""
Set value in cache
Args:
key: Cache key
value: Value to cache
ttl: Time to live in seconds
Returns:
True if successfully cached
"""
...
@abstractmethod
async def delete(self, key: str) -> bool:
"""
Delete key from cache
Args:
key: Cache key to delete
Returns:
True if successfully deleted
"""
...
class SecurityServiceProtocol(Protocol):
"""Protocol for Security service interface"""
@abstractmethod
async def analyze_request(self, request: Any) -> Any:
"""
Perform security analysis on a request
Args:
request: Request object to analyze
Returns:
Security analysis result
"""
...
@abstractmethod
async def validate_request(self, request: Any) -> bool:
"""
Validate request for security compliance
Args:
request: Request object to validate
Returns:
True if request is valid/safe
"""
...
class WorkflowServiceProtocol(Protocol):
"""Protocol for Workflow service interface"""
@abstractmethod
async def execute_workflow(self, workflow: Any, input_data: Dict[str, Any] = None) -> Any:
"""
Execute a workflow definition
Args:
workflow: Workflow definition to execute
input_data: Optional input data for the workflow
Returns:
Workflow execution result
"""
...
@abstractmethod
async def get_execution(self, execution_id: str) -> Any:
"""
Get workflow execution status
Args:
execution_id: ID of the execution to retrieve
Returns:
Execution status object
"""
...
class ModuleServiceProtocol(Protocol):
"""Base protocol for all module services"""
@abstractmethod
async def initialize(self, **kwargs) -> None:
"""Initialize the module"""
...
@abstractmethod
async def cleanup(self) -> None:
"""Cleanup module resources"""
...
@abstractmethod
def get_required_permissions(self) -> List[Any]:
"""Get required permissions for this module"""
...
# Type aliases for common service combinations
ServiceRegistry = Dict[str, ModuleServiceProtocol]
ServiceDependencies = Dict[str, Optional[ModuleServiceProtocol]]

View File

@@ -0,0 +1,6 @@
"""
RAG (Retrieval-Augmented Generation) module for Confidential Empire platform
"""
from .main import RAGModule
__all__ = ["RAGModule"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,82 @@
name: rag
version: 1.0.0
description: "Document search, retrieval, and vector storage"
author: "Enclava Team"
category: "ai"
# Module lifecycle
enabled: true
auto_start: true
dependencies: []
optional_dependencies:
- cache
# Module capabilities
provides:
- "document_storage"
- "semantic_search"
- "vector_embeddings"
- "document_processing"
consumes:
- "qdrant_connection"
- "llm_embeddings"
- "document_parsing"
# API endpoints
endpoints:
- path: "/rag/collections"
method: "GET"
description: "List document collections"
- path: "/rag/upload"
method: "POST"
description: "Upload and process documents"
- path: "/rag/search"
method: "POST"
description: "Semantic search in documents"
- path: "/rag/collections/{collection_id}/documents"
method: "GET"
description: "List documents in collection"
# UI Configuration
ui_config:
icon: "search"
color: "#8B5CF6"
category: "AI & ML"
forms:
- name: "collection_config"
title: "Collection Settings"
fields: ["name", "description", "embedding_model"]
- name: "search_config"
title: "Search Configuration"
fields: ["top_k", "similarity_threshold", "rerank_enabled"]
# Permissions
permissions:
- name: "rag.create"
description: "Create document collections"
- name: "rag.upload"
description: "Upload documents to collections"
- name: "rag.search"
description: "Search document collections"
- name: "rag.manage"
description: "Manage all collections (admin)"
# Health checks
health_checks:
- name: "qdrant_connectivity"
description: "Check Qdrant vector database connection"
- name: "embeddings_service"
description: "Check LLM embeddings service"
- name: "document_processing"
description: "Check document parsing capabilities"

View File

@@ -0,0 +1,211 @@
"""
Optimized JSONL Processor for RAG Module
Handles JSONL files efficiently to prevent resource exhaustion
"""
import json
import logging
import asyncio
from typing import Dict, Any, List
from datetime import datetime
import uuid
from qdrant_client.models import PointStruct, Filter, FieldCondition, MatchValue
from qdrant_client.http.models import Batch
from app.modules.rag.main import ProcessedDocument
# from app.core.analytics import log_module_event # Analytics module not available
logger = logging.getLogger(__name__)
class JSONLProcessor:
"""Specialized processor for JSONL files"""
def __init__(self, rag_module):
self.rag_module = rag_module
self.config = rag_module.config
async def process_and_index_jsonl(self, collection_name: str, content: bytes,
filename: str, metadata: Dict[str, Any]) -> str:
"""Process and index a JSONL file efficiently
Processes each JSON line as a separate document to avoid
creating thousands of chunks from a single large document.
"""
try:
# Decode content
jsonl_content = content.decode('utf-8', errors='replace')
lines = jsonl_content.strip().split('\n')
logger.info(f"Processing JSONL file {filename} with {len(lines)} lines")
# Generate base document ID
base_doc_id = self.rag_module._generate_document_id(jsonl_content, metadata)
# Process lines in batches
batch_size = 10 # Smaller batches for better memory management
processed_count = 0
for batch_start in range(0, len(lines), batch_size):
batch_end = min(batch_start + batch_size, len(lines))
batch_lines = lines[batch_start:batch_end]
# Process batch
await self._process_jsonl_batch(
collection_name,
batch_lines,
batch_start,
base_doc_id,
filename,
metadata
)
processed_count += len(batch_lines)
# Log progress
if processed_count % 50 == 0:
logger.info(f"Processed {processed_count}/{len(lines)} lines from {filename}")
# Small delay to prevent resource exhaustion
await asyncio.sleep(0.05)
logger.info(f"Successfully processed JSONL file {filename} with {len(lines)} lines")
return base_doc_id
except Exception as e:
logger.error(f"Error processing JSONL file {filename}: {e}")
raise
async def _process_jsonl_batch(self, collection_name: str, lines: List[str],
start_idx: int, base_doc_id: str,
filename: str, metadata: Dict[str, Any]) -> None:
"""Process a batch of JSONL lines"""
try:
points = []
for line_idx, line in enumerate(lines, start=start_idx + 1):
if not line.strip():
continue
try:
# Parse JSON line
data = json.loads(line)
# Debug: check if data is None
if data is None:
logger.warning(f"JSON line {line_idx} parsed as None")
continue
# Handle helpjuice export format
if 'payload' in data and data['payload'] is not None:
payload = data['payload']
article_id = data.get('id', f'article_{line_idx}')
# Extract Q&A
question = payload.get('question', '')
answer = payload.get('answer', '')
language = payload.get('language', 'EN')
if question or answer:
# Create Q&A content
content = f"Question: {question}\n\nAnswer: {answer}"
# Create metadata
doc_metadata = {
**metadata,
"article_id": article_id,
"language": language,
"filename": filename,
"line_number": line_idx,
"content_type": "qa_pair",
"question": question[:100], # Truncate for metadata
"processed_at": datetime.utcnow().isoformat()
}
# Generate single embedding for the Q&A pair
embeddings = await self.rag_module._generate_embeddings([content])
# Create point
point_id = str(uuid.uuid4())
points.append(PointStruct(
id=point_id,
vector=embeddings[0],
payload={
**doc_metadata,
"document_id": f"{base_doc_id}_{article_id}",
"content": content,
"chunk_index": 0,
"chunk_count": 1
}
))
# Handle generic JSON format
else:
content = json.dumps(data, indent=2, ensure_ascii=False)
# For larger JSON objects, we might need to chunk
if len(content) > 1000:
chunks = self.rag_module._chunk_text(content, chunk_size=500)
embeddings = await self.rag_module._generate_embeddings(chunks)
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
point_id = str(uuid.uuid4())
points.append(PointStruct(
id=point_id,
vector=embedding,
payload={
**metadata,
"filename": filename,
"line_number": line_idx,
"content_type": "json_object",
"document_id": f"{base_doc_id}_line_{line_idx}",
"content": chunk,
"chunk_index": i,
"chunk_count": len(chunks)
}
))
else:
# Small JSON - no chunking needed
embeddings = await self.rag_module._generate_embeddings([content])
point_id = str(uuid.uuid4())
points.append(PointStruct(
id=point_id,
vector=embeddings[0],
payload={
**metadata,
"filename": filename,
"line_number": line_idx,
"content_type": "json_object",
"document_id": f"{base_doc_id}_line_{line_idx}",
"content": content,
"chunk_index": 0,
"chunk_count": 1
}
))
except json.JSONDecodeError as e:
logger.warning(f"Error parsing JSONL line {line_idx}: {e}")
continue
except Exception as e:
logger.warning(f"Error processing JSONL line {line_idx}: {e}")
continue
# Insert all points in this batch
if points:
self.rag_module.qdrant_client.upsert(
collection_name=collection_name,
points=points
)
# Update stats
self.rag_module.stats["documents_indexed"] += len(points)
# log_module_event("rag", "jsonl_batch_processed", { # Analytics module not available
# "filename": filename,
# "lines_processed": len(lines),
# "points_created": len(points)
# })
except Exception as e:
logger.error(f"Error processing JSONL batch: {e}")
raise

View File

@@ -0,0 +1,163 @@
"""
Qdrant Stats Service
Provides direct, live statistics from Qdrant vector database
This is the single source of truth for all RAG collection statistics
"""
import httpx
import logging
from typing import List, Dict, Any, Optional
from datetime import datetime
from app.core.config import settings
logger = logging.getLogger(__name__)
class QdrantStatsService:
"""Service for getting live statistics from Qdrant"""
def __init__(self):
self.qdrant_host = getattr(settings, 'QDRANT_HOST', 'enclava-qdrant')
self.qdrant_port = getattr(settings, 'QDRANT_PORT', 6333)
self.qdrant_url = f"http://{self.qdrant_host}:{self.qdrant_port}"
async def get_collections_stats(self) -> Dict[str, Any]:
"""Get live collection statistics directly from Qdrant"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
# Get all collections
response = await client.get(f"{self.qdrant_url}/collections")
if response.status_code != 200:
logger.error(f"Failed to get collections: {response.status_code}")
return {"collections": [], "total_documents": 0, "total_size_bytes": 0}
data = response.json()
result = data.get("result", {})
collections_data = result.get("collections", [])
collections = []
total_documents = 0
total_size_bytes = 0
# Get detailed info for each collection
for col_info in collections_data:
collection_name = col_info.get("name", "")
# Include all collections, not just rag_ ones
# Get detailed collection info
try:
detail_response = await client.get(f"{self.qdrant_url}/collections/{collection_name}")
if detail_response.status_code == 200:
detail_data = detail_response.json()
detail_result = detail_data.get("result", {})
points_count = detail_result.get("points_count", 0)
status = detail_result.get("status", "unknown")
# Get vector size for size calculation
vector_size = 1024 # Default for multilingual-e5-large
try:
config = detail_result.get("config", {})
params = config.get("params", {})
vectors = params.get("vectors", {})
if isinstance(vectors, dict) and "size" in vectors:
vector_size = vectors["size"]
elif isinstance(vectors, dict) and "default" in vectors:
vector_size = vectors["default"].get("size", 1024)
except Exception:
pass
# Estimate size (points * vector_size * 4 bytes + 20% metadata overhead)
estimated_size = int(points_count * vector_size * 4 * 1.2)
# Extract collection metadata for user-friendly name
display_name = collection_name
description = ""
# Parse collection name to get original name
if collection_name.startswith("rag_"):
parts = collection_name[4:].split("_")
if len(parts) > 1:
# Remove the UUID suffix
uuid_parts = [p for p in parts if len(p) == 8 and all(c in '0123456789abcdef' for c in p)]
for uuid_part in uuid_parts:
parts.remove(uuid_part)
display_name = " ".join(parts).replace("_", " ").title()
collection_stat = {
"id": collection_name,
"name": display_name,
"description": description,
"document_count": points_count,
"vector_count": points_count,
"size_bytes": estimated_size,
"status": status,
"qdrant_collection_name": collection_name,
"created_at": "", # Not available from Qdrant
"updated_at": datetime.utcnow().isoformat(),
"is_active": status == "green",
"is_managed": True,
"source": "qdrant"
}
collections.append(collection_stat)
total_documents += points_count
total_size_bytes += estimated_size
except Exception as e:
logger.error(f"Error getting details for collection {collection_name}: {e}")
continue
return {
"collections": collections,
"total_documents": total_documents,
"total_size_bytes": total_size_bytes,
"total_collections": len(collections)
}
except Exception as e:
logger.error(f"Error getting Qdrant stats: {e}")
return {"collections": [], "total_documents": 0, "total_size_bytes": 0, "total_collections": 0}
async def get_collection_stats(self, collection_name: str) -> Optional[Dict[str, Any]]:
"""Get statistics for a specific collection"""
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(f"{self.qdrant_url}/collections/{collection_name}")
if response.status_code != 200:
return None
data = response.json()
result = data.get("result", {})
points_count = result.get("points_count", 0)
status = result.get("status", "unknown")
# Get vector size
vector_size = 1024
try:
config = result.get("config", {})
params = config.get("params", {})
vectors = params.get("vectors", {})
if isinstance(vectors, dict) and "size" in vectors:
vector_size = vectors["size"]
except Exception:
pass
estimated_size = int(points_count * vector_size * 4 * 1.2)
return {
"document_count": points_count,
"vector_count": points_count,
"size_bytes": estimated_size,
"status": status
}
except Exception as e:
logger.error(f"Error getting collection stats for {collection_name}: {e}")
return None
# Global instance
qdrant_stats_service = QdrantStatsService()