From e070c9519071f78946be353b8c5a14e999d44730 Mon Sep 17 00:00:00 2001
From: Aljaz Ceru <aljaz@ceru.si>
Date: Thu, 30 Oct 2025 06:12:35 +0100
Subject: [PATCH] before rbac and tool use

---
 backend/app/api/v1/auth.py                    |   56 +-
 backend/app/api/v1/chatbot.py                 |    2 +-
 backend/app/api/v1/prompt_templates.py        |   43 +-
 backend/app/db/database.py                    |   19 +-
 backend/app/main.py                           |  122 +-
 backend/app/services/document_processor.py    |   70 +-
 backend/app/services/llm/config.py            |  111 +-
 backend/app/services/llm/service.py           |   23 +-
 backend/app/services/module_manager.py        |  152 +-
 backend/app/services/permission_manager.py    |    7 +-
 backend/modules/chatbot/__init__.py           |   21 -
 backend/modules/chatbot/config_schema.json    |  126 -
 .../examples/customer_support_workflow.json   |  182 --
 backend/modules/chatbot/main.py               |  949 --------
 backend/modules/chatbot/module.yaml           |  110 -
 backend/modules/factory.py                    |  225 --
 backend/modules/protocols.py                  |  258 --
 backend/modules/rag/__init__.py               |    6 -
 backend/modules/rag/main.py                   | 2083 -----------------
 backend/modules/rag/module.yaml               |   82 -
 backend/tests/test_modules.py                 |   10 +-
 frontend/src/contexts/PluginContext.tsx       |  166 +-
 22 files changed, 577 insertions(+), 4246 deletions(-)
 delete mode 100644 backend/modules/chatbot/__init__.py
 delete mode 100644 backend/modules/chatbot/config_schema.json
 delete mode 100644 backend/modules/chatbot/examples/customer_support_workflow.json
 delete mode 100644 backend/modules/chatbot/main.py
 delete mode 100644 backend/modules/chatbot/module.yaml
 delete mode 100644 backend/modules/factory.py
 delete mode 100644 backend/modules/protocols.py
 delete mode 100644 backend/modules/rag/__init__.py
 delete mode 100644 backend/modules/rag/main.py
 delete mode 100644 backend/modules/rag/module.yaml

diff --git a/backend/app/api/v1/auth.py b/backend/app/api/v1/auth.py
index 1824be4..b9c30b8 100644
--- a/backend/app/api/v1/auth.py
+++ b/backend/app/api/v1/auth.py
@@ -21,7 +21,7 @@ from app.core.security import (
     get_current_user,
     get_current_active_user,
 )
-from app.db.database import get_db
+from app.db.database import get_db, create_default_admin
 from app.models.user import User
 from app.utils.exceptions import AuthenticationError, ValidationError
 
@@ -201,23 +201,45 @@ async def login(
     user = result.scalar_one_or_none()
 
     if not user:
-        logger.warning("LOGIN_USER_NOT_FOUND", identifier=identifier)
-        # List available users for debugging
-        try:
-            all_users_stmt = select(User).limit(5)
-            all_users_result = await db.execute(all_users_stmt)
-            all_users = all_users_result.scalars().all()
-            logger.info(
-                "LOGIN_USER_LIST",
-                users=[u.email for u in all_users],
+        bootstrap_attempted = False
+        identifier_lower = identifier.lower() if identifier else ""
+        admin_email = settings.ADMIN_EMAIL.lower() if settings.ADMIN_EMAIL else None
+
+        if user_data.email and admin_email and identifier_lower == admin_email and settings.ADMIN_PASSWORD:
+            bootstrap_attempted = True
+            logger.info("LOGIN_ADMIN_BOOTSTRAP_START", email=user_data.email)
+            try:
+                await create_default_admin()
+                # Re-run lookup after bootstrap attempt
+                stmt = select(User).where(User.email == user_data.email)
+                result = await db.execute(stmt)
+                user = result.scalar_one_or_none()
+                if user:
+                    logger.info("LOGIN_ADMIN_BOOTSTRAP_SUCCESS", email=user.email)
+            except Exception as bootstrap_exc:
+                logger.error("LOGIN_ADMIN_BOOTSTRAP_FAILED", error=str(bootstrap_exc))
+
+        if not user:
+            logger.warning("LOGIN_USER_NOT_FOUND", identifier=identifier)
+            # List available users for debugging
+            try:
+                all_users_stmt = select(User).limit(5)
+                all_users_result = await db.execute(all_users_stmt)
+                all_users = all_users_result.scalars().all()
+                logger.info(
+                    "LOGIN_USER_LIST",
+                    users=[u.email for u in all_users],
+                )
+            except Exception as e:
+                logger.error("LOGIN_USER_LIST_FAILURE", error=str(e))
+            
+            if bootstrap_attempted:
+                logger.warning("LOGIN_ADMIN_BOOTSTRAP_UNSUCCESSFUL", email=user_data.email)
+            
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Incorrect email or password"
             )
-        except Exception as e:
-            logger.error("LOGIN_USER_LIST_FAILURE", error=str(e))
-        
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Incorrect email or password"
-        )
     
     logger.info("LOGIN_USER_FOUND", email=user.email, is_active=user.is_active)
     logger.info("LOGIN_PASSWORD_VERIFY_START")
diff --git a/backend/app/api/v1/chatbot.py b/backend/app/api/v1/chatbot.py
index b949174..b31f58d 100644
--- a/backend/app/api/v1/chatbot.py
+++ b/backend/app/api/v1/chatbot.py
@@ -158,7 +158,7 @@ async def create_chatbot(
             raise HTTPException(status_code=500, detail="Chatbot module not available")
         
         # Import needed types
-        from modules.chatbot.main import ChatbotConfig
+        from app.modules.chatbot.main import ChatbotConfig
         
         # Create chatbot config object
         config = ChatbotConfig(
diff --git a/backend/app/api/v1/prompt_templates.py b/backend/app/api/v1/prompt_templates.py
index 6612149..c965f72 100644
--- a/backend/app/api/v1/prompt_templates.py
+++ b/backend/app/api/v1/prompt_templates.py
@@ -7,6 +7,7 @@ from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select, update, delete
+from sqlalchemy.dialects.postgresql import insert
 from datetime import datetime
 import uuid
 
@@ -513,21 +514,33 @@ async def seed_default_templates(
                     inactive_template.updated_at = datetime.utcnow()
                     updated_templates.append(type_key)
                 else:
-                    # Create new template
-                    new_template = PromptTemplate(
-                        id=str(uuid.uuid4()),
-                        name=template_data["name"],
-                        type_key=type_key,
-                        description=template_data["description"],
-                        system_prompt=template_data["prompt"],
-                        is_default=True,
-                        is_active=True,
-                        version=1,
-                        created_at=datetime.utcnow(),
-                        updated_at=datetime.utcnow()
+                    # Create new template, gracefully skipping if another request created it first
+                    now = datetime.utcnow()
+                    stmt = (
+                        insert(PromptTemplate)
+                        .values(
+                            id=str(uuid.uuid4()),
+                            name=template_data["name"],
+                            type_key=type_key,
+                            description=template_data["description"],
+                            system_prompt=template_data["prompt"],
+                            is_default=True,
+                            is_active=True,
+                            version=1,
+                            created_at=now,
+                            updated_at=now,
+                        )
+                        .on_conflict_do_nothing(index_elements=[PromptTemplate.type_key])
                     )
-                    db.add(new_template)
-                    created_templates.append(type_key)
+
+                    result = await db.execute(stmt)
+                    if result.rowcount:
+                        created_templates.append(type_key)
+                    else:
+                        log_api_request(
+                            "prompt_template_seed_skipped",
+                            {"type_key": type_key, "reason": "already_exists"},
+                        )
         
         await db.commit()
         
@@ -541,4 +554,4 @@ async def seed_default_templates(
     except Exception as e:
         await db.rollback()
         log_api_request("seed_default_templates_error", {"error": str(e), "user_id": user_id})
-        raise HTTPException(status_code=500, detail=f"Failed to seed default templates: {str(e)}")
\ No newline at end of file
+        raise HTTPException(status_code=500, detail=f"Failed to seed default templates: {str(e)}")
diff --git a/backend/app/db/database.py b/backend/app/db/database.py
index 85ce5a0..38a3386 100644
--- a/backend/app/db/database.py
+++ b/backend/app/db/database.py
@@ -5,6 +5,7 @@ Database connection and session management
 import logging
 from typing import AsyncGenerator
 from sqlalchemy import create_engine, MetaData
+from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
 from sqlalchemy.orm import sessionmaker, declarative_base
 from sqlalchemy.pool import StaticPool
@@ -141,21 +142,27 @@ async def create_default_admin():
     from app.core.security import get_password_hash
     from app.core.config import settings
     from sqlalchemy import select
+    from sqlalchemy.exc import SQLAlchemyError
     
     try:
+        admin_email = settings.ADMIN_EMAIL
+        admin_password = settings.ADMIN_PASSWORD
+
+        if not admin_email or not admin_password:
+            logger.info("Admin bootstrap skipped: ADMIN_EMAIL or ADMIN_PASSWORD unset")
+            return
+        
         async with async_session_factory() as session:
             # Check if user with ADMIN_EMAIL exists
-            stmt = select(User).where(User.email == settings.ADMIN_EMAIL)
+            stmt = select(User).where(User.email == admin_email)
             result = await session.execute(stmt)
             existing_user = result.scalar_one_or_none()
             
             if existing_user:
-                logger.info(f"User with email {settings.ADMIN_EMAIL} already exists - skipping admin creation")
+                logger.info(f"User with email {admin_email} already exists - skipping admin creation")
                 return
             
             # Create admin user from environment variables
-            admin_email = settings.ADMIN_EMAIL
-            admin_password = settings.ADMIN_PASSWORD
             # Generate username from email (part before @)
             admin_username = admin_email.split('@')[0]
             
@@ -176,6 +183,10 @@ async def create_default_admin():
             logger.warning("PLEASE CHANGE THE PASSWORD AFTER FIRST LOGIN")
             logger.warning("=" * 60)
             
+    except SQLAlchemyError as e:
+        logger.error(f"Failed to create default admin user due to database error: {e}")
+    except AttributeError as e:
+        logger.error(f"Failed to create default admin user: invalid ADMIN_EMAIL '{settings.ADMIN_EMAIL}'")
     except Exception as e:
         logger.error(f"Failed to create default admin user: {e}")
         # Don't raise here as this shouldn't block the application startup
diff --git a/backend/app/main.py b/backend/app/main.py
index 41ecf54..4a2eeb8 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -103,6 +103,7 @@ async def lifespan(app: FastAPI):
     Application lifespan handler
     """
     logger.info("Starting Enclava platform...")
+    background_tasks = []
     
     # Initialize core cache service (before database to provide caching for auth)
     from app.core.cache import core_cache
@@ -125,16 +126,27 @@ async def lifespan(app: FastAPI):
     # Initialize config manager
     await init_config_manager()
 
-    # Initialize LLM service (needed by RAG module)
+    # Ensure platform permissions are registered before module discovery
+    from app.services.permission_manager import permission_registry
+    permission_registry.register_platform_permissions()
+
+    # Initialize LLM service (needed by RAG module) concurrently
     from app.services.llm.service import llm_service
-    try:
-        await llm_service.initialize()
-        logger.info("LLM service initialized successfully")
-    except Exception as e:
-        logger.warning(f"LLM service initialization failed: {e}")
+
+    async def initialize_llm_service():
+        try:
+            await llm_service.initialize()
+            logger.info("LLM service initialized successfully")
+        except Exception as exc:
+            logger.warning(f"LLM service initialization failed: {exc}")
+
+    background_tasks.append(asyncio.create_task(initialize_llm_service()))
 
     # Initialize analytics service
-    init_analytics_service()
+    try:
+        init_analytics_service()
+    except Exception as exc:
+        logger.warning(f"Analytics service initialization failed: {exc}")
 
     # Initialize module manager with FastAPI app for router registration
     logger.info("Initializing module manager...")
@@ -142,62 +154,78 @@ async def lifespan(app: FastAPI):
     app.state.module_manager = module_manager
     logger.info("Module manager initialized successfully")
     
-    # Initialize permission registry
-    logger.info("Initializing permission registry...")
-    from app.services.permission_manager import permission_registry
-    permission_registry.register_platform_permissions()
-    logger.info("Permission registry initialized successfully")
-    
     # Initialize document processor
     from app.services.document_processor import document_processor
-    await document_processor.start()
-    app.state.document_processor = document_processor
+    try:
+        await document_processor.start()
+        app.state.document_processor = document_processor
+    except Exception as exc:
+        logger.error(f"Document processor failed to start: {exc}")
+        app.state.document_processor = None
     
     # Setup metrics
-    setup_metrics(app)
+    try:
+        setup_metrics(app)
+    except Exception as exc:
+        logger.warning(f"Metrics setup failed: {exc}")
     
     # Start background audit worker
     from app.services.audit_service import start_audit_worker
-    start_audit_worker()
-    
-    # Initialize plugin auto-discovery service
-    from app.services.plugin_autodiscovery import initialize_plugin_autodiscovery
     try:
-        discovery_results = await initialize_plugin_autodiscovery()
-        app.state.plugin_discovery_results = discovery_results
-        logger.info(f"Plugin auto-discovery completed: {discovery_results['summary']}")
-    except Exception as e:
-        logger.warning(f"Plugin auto-discovery failed: {e}")
+        start_audit_worker()
+    except Exception as exc:
+        logger.warning(f"Audit worker failed to start: {exc}")
+    
+    # Initialize plugin auto-discovery service concurrently
+    async def initialize_plugins():
+        from app.services.plugin_autodiscovery import initialize_plugin_autodiscovery
+        try:
+            discovery_results = await initialize_plugin_autodiscovery()
+            app.state.plugin_discovery_results = discovery_results
+            logger.info(f"Plugin auto-discovery completed: {discovery_results.get('summary')}")
+        except Exception as exc:
+            logger.warning(f"Plugin auto-discovery failed: {exc}")
+            app.state.plugin_discovery_results = {"error": str(exc)}
+    
+    background_tasks.append(asyncio.create_task(initialize_plugins()))
+
+    if background_tasks:
+        results = await asyncio.gather(*background_tasks, return_exceptions=True)
+        for result in results:
+            if isinstance(result, Exception):
+                logger.warning(f"Background startup task failed: {result}")
     
     logger.info("Platform started successfully")
     
-    yield
-    
-    # Cleanup
-    logger.info("Shutting down platform...")
-
-    # Cleanup embedding service HTTP sessions
-    from app.services.embedding_service import embedding_service
     try:
-        await embedding_service.cleanup()
-        logger.info("Embedding service cleaned up successfully")
-    except Exception as e:
-        logger.error(f"Error cleaning up embedding service: {e}")
+        yield
+    finally:
+        # Cleanup
+        logger.info("Shutting down platform...")
 
-    # Close core cache service
-    from app.core.cache import core_cache
-    await core_cache.cleanup()
+        # Cleanup embedding service HTTP sessions
+        from app.services.embedding_service import embedding_service
+        try:
+            await embedding_service.cleanup()
+            logger.info("Embedding service cleaned up successfully")
+        except Exception as e:
+            logger.error(f"Error cleaning up embedding service: {e}")
 
-    # Close Redis connection for cached API key service
-    from app.services.cached_api_key import cached_api_key_service
-    await cached_api_key_service.close()
+        # Close core cache service
+        from app.core.cache import core_cache
+        await core_cache.cleanup()
 
-    # Stop document processor
-    if hasattr(app.state, 'document_processor'):
-        await app.state.document_processor.stop()
+        # Close Redis connection for cached API key service
+        from app.services.cached_api_key import cached_api_key_service
+        await cached_api_key_service.close()
 
-    await module_manager.cleanup()
-    logger.info("Platform shutdown complete")
+        # Stop document processor
+        processor = getattr(app.state, 'document_processor', None)
+        if processor:
+            await processor.stop()
+
+        await module_manager.cleanup()
+        logger.info("Platform shutdown complete")
 
 
 # Create FastAPI application
diff --git a/backend/app/services/document_processor.py b/backend/app/services/document_processor.py
index 57e44b0..81c8ee5 100644
--- a/backend/app/services/document_processor.py
+++ b/backend/app/services/document_processor.py
@@ -9,6 +9,7 @@ from typing import Dict, Any, Optional, List
 from datetime import datetime
 from enum import Enum
 from dataclasses import dataclass
+from pathlib import Path
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select, update
 from sqlalchemy.orm import selectinload
@@ -99,12 +100,15 @@ class DocumentProcessor:
         try:
             task = ProcessingTask(document_id=document_id, priority=priority)
             
-            # Check if queue is full
-            if self.processing_queue.full():
-                logger.warning(f"Processing queue is full, dropping task for document {document_id}")
+            try:
+                await asyncio.wait_for(self.processing_queue.put(task), timeout=5.0)
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "Processing queue saturated, could not enqueue document %s within timeout",
+                    document_id,
+                )
                 return False
             
-            await self.processing_queue.put(task)
             self.stats["queue_size"] = self.processing_queue.qsize()
             
             logger.info(f"Added processing task for document {document_id} (priority: {priority})")
@@ -119,6 +123,7 @@ class DocumentProcessor:
         logger.info(f"Started worker: {worker_name}")
         
         while self.running:
+            task: Optional[ProcessingTask] = None
             try:
                 # Get task from queue (wait up to 1 second)
                 task = await asyncio.wait_for(
@@ -142,14 +147,21 @@ class DocumentProcessor:
                     if task.retry_count < task.max_retries:
                         task.retry_count += 1
                         await asyncio.sleep(2 ** task.retry_count)  # Exponential backoff
-                        await self.processing_queue.put(task)
+                        try:
+                            await asyncio.wait_for(self.processing_queue.put(task), timeout=5.0)
+                        except asyncio.TimeoutError:
+                            logger.error(
+                                "%s: Failed to requeue document %s due to saturated queue",
+                                worker_name,
+                                task.document_id,
+                            )
+                            self.stats["error_count"] += 1
+                            continue
                         logger.warning(f"{worker_name}: Retrying document {task.document_id} (attempt {task.retry_count})")
                     else:
                         self.stats["error_count"] += 1
                         logger.error(f"{worker_name}: Failed to process document {task.document_id} after {task.max_retries} retries")
                 
-                self.stats["active_workers"] -= 1
-                
             except asyncio.TimeoutError:
                 # No tasks in queue, continue
                 continue
@@ -157,9 +169,14 @@ class DocumentProcessor:
                 # Worker cancelled, exit
                 break
             except Exception as e:
-                self.stats["active_workers"] -= 1
                 logger.error(f"{worker_name}: Unexpected error: {e}")
                 await asyncio.sleep(1)  # Brief pause before continuing
+            finally:
+                if task is not None:
+                    self.processing_queue.task_done()
+                if self.stats["active_workers"] > 0:
+                    self.stats["active_workers"] -= 1
+                self.stats["queue_size"] = self.processing_queue.qsize()
 
         logger.info(f"Worker stopped: {worker_name}")
 
@@ -172,16 +189,24 @@ class DocumentProcessor:
             if not module_manager.initialized:
                 await module_manager.initialize()
 
-            rag_module = module_manager.modules.get('rag')
+            rag_module = module_manager.get_module('rag')
 
-            if not rag_module or not getattr(rag_module, 'enabled', False):
+            if not rag_module:
                 enabled = await module_manager.enable_module('rag')
                 if not enabled:
-                    raise Exception("Failed to enable RAG module")
-                rag_module = module_manager.modules.get('rag')
+                    raise RuntimeError("Failed to enable RAG module")
+                rag_module = module_manager.get_module('rag')
 
-            if not rag_module or not getattr(rag_module, 'enabled', False):
-                raise Exception("RAG module not available or not enabled")
+            if not rag_module:
+                raise RuntimeError("RAG module not available after enable attempt")
+
+            if not getattr(rag_module, 'enabled', True):
+                enabled = await module_manager.enable_module('rag')
+                if not enabled:
+                    raise RuntimeError("RAG module is disabled and could not be re-enabled")
+                rag_module = module_manager.get_module('rag')
+                if not rag_module or not getattr(rag_module, 'enabled', True):
+                    raise RuntimeError("RAG module is disabled and could not be re-enabled")
 
             self._rag_module = rag_module
             logger.info("DocumentProcessor cached RAG module instance for reuse")
@@ -224,8 +249,21 @@ class DocumentProcessor:
                 
                 # Read file content
                 logger.info(f"Reading file content for document {task.document_id}: {document.file_path}")
-                with open(document.file_path, 'rb') as f:
-                    file_content = f.read()
+                file_path = Path(document.file_path)
+                try:
+                    file_content = await asyncio.to_thread(file_path.read_bytes)
+                except FileNotFoundError:
+                    logger.error(f"File not found for document {task.document_id}: {document.file_path}")
+                    document.status = ProcessingStatus.ERROR
+                    document.processing_error = "Document file not found on disk"
+                    await session.commit()
+                    return False
+                except Exception as exc:
+                    logger.error(f"Failed reading file for document {task.document_id}: {exc}")
+                    document.status = ProcessingStatus.ERROR
+                    document.processing_error = f"Failed to read file: {exc}"
+                    await session.commit()
+                    return False
                 
                 logger.info(f"File content read successfully for document {task.document_id}, size: {len(file_content)} bytes")
                 
diff --git a/backend/app/services/llm/config.py b/backend/app/services/llm/config.py
index b7aeb13..a5bb9f5 100644
--- a/backend/app/services/llm/config.py
+++ b/backend/app/services/llm/config.py
@@ -78,15 +78,16 @@ class LLMServiceConfig(BaseModel):
     
 
 
-def create_default_config() -> LLMServiceConfig:
+def create_default_config(env_vars=None) -> LLMServiceConfig:
     """Create default LLM service configuration"""
-    
+    env = env_vars or EnvironmentVariables()
+
     # PrivateMode.ai configuration (via proxy)
     # Models will be fetched dynamically from proxy /models endpoint
     privatemode_config = ProviderConfig(
         name="privatemode",
         provider_type="privatemode",
-        enabled=True,
+        enabled=bool(env.PRIVATEMODE_API_KEY),
         base_url=settings.PRIVATEMODE_PROXY_URL,
         api_key_env_var="PRIVATEMODE_API_KEY",
         default_model="privatemode-latest",
@@ -108,13 +109,105 @@ def create_default_config() -> LLMServiceConfig:
         )
     )
     
+    providers: Dict[str, ProviderConfig] = {
+        "privatemode": privatemode_config
+    }
+
+    if env.OPENAI_API_KEY:
+        providers["openai"] = ProviderConfig(
+            name="openai",
+            provider_type="openai",
+            enabled=True,
+            base_url="https://api.openai.com/v1",
+            api_key_env_var="OPENAI_API_KEY",
+            default_model="gpt-4o-mini",
+            supported_models=[
+                "gpt-4o-mini",
+                "gpt-4o",
+                "gpt-3.5-turbo",
+                "text-embedding-3-large",
+                "text-embedding-3-small"
+            ],
+            capabilities=["chat", "embeddings"],
+            priority=2,
+            supports_streaming=True,
+            supports_function_calling=True,
+            max_context_window=128000,
+            max_output_tokens=8192,
+            resilience=ResilienceConfig(
+                max_retries=2,
+                retry_delay_ms=750,
+                timeout_ms=45000,
+                circuit_breaker_threshold=6,
+                circuit_breaker_reset_timeout_ms=60000
+            )
+        )
+    
+    if env.ANTHROPIC_API_KEY:
+        providers["anthropic"] = ProviderConfig(
+            name="anthropic",
+            provider_type="anthropic",
+            enabled=True,
+            base_url="https://api.anthropic.com/v1",
+            api_key_env_var="ANTHROPIC_API_KEY",
+            default_model="claude-3-opus-20240229",
+            supported_models=[
+                "claude-3-opus-20240229",
+                "claude-3-sonnet-20240229",
+                "claude-3-haiku-20240307"
+            ],
+            capabilities=["chat"],
+            priority=3,
+            supports_streaming=True,
+            supports_function_calling=False,
+            max_context_window=200000,
+            max_output_tokens=4096,
+            resilience=ResilienceConfig(
+                max_retries=3,
+                retry_delay_ms=1000,
+                timeout_ms=60000,
+                circuit_breaker_threshold=5,
+                circuit_breaker_reset_timeout_ms=90000
+            )
+        )
+    
+    if env.GOOGLE_API_KEY:
+        providers["google"] = ProviderConfig(
+            name="google",
+            provider_type="google",
+            enabled=True,
+            base_url="https://generativelanguage.googleapis.com/v1beta",
+            api_key_env_var="GOOGLE_API_KEY",
+            default_model="models/gemini-1.5-pro-latest",
+            supported_models=[
+                "models/gemini-1.5-pro-latest",
+                "models/gemini-1.5-flash-latest"
+            ],
+            capabilities=["chat", "multimodal"],
+            priority=4,
+            supports_streaming=True,
+            supports_function_calling=True,
+            max_context_window=200000,
+            max_output_tokens=8192,
+            resilience=ResilienceConfig(
+                max_retries=2,
+                retry_delay_ms=1000,
+                timeout_ms=45000,
+                circuit_breaker_threshold=4,
+                circuit_breaker_reset_timeout_ms=60000
+            )
+        )
+    
+    default_provider = next(
+        (name for name, provider in providers.items() if provider.enabled),
+        "privatemode"
+    )
+    
     # Create main configuration
     config = LLMServiceConfig(
-        default_provider="privatemode",
+        default_provider=default_provider,
         enable_detailed_logging=settings.LOG_LLM_PROMPTS,
-        providers={
-            "privatemode": privatemode_config
-        },
+        providers=providers,
         model_routing={}  # Will be populated dynamically from provider models
     )
     
@@ -174,7 +267,7 @@ class ConfigurationManager:
     def get_config(self) -> LLMServiceConfig:
         """Get current configuration"""
         if self._config is None:
-            self._config = create_default_config()
+            self._config = create_default_config(self._env_vars)
             self._validate_configuration()
         
         return self._config
@@ -271,4 +364,4 @@ class ConfigurationManager:
 
 
 # Global configuration manager
-config_manager = ConfigurationManager()
\ No newline at end of file
+config_manager = ConfigurationManager()
diff --git a/backend/app/services/llm/service.py b/backend/app/services/llm/service.py
index 7e64a03..1f71a89 100644
--- a/backend/app/services/llm/service.py
+++ b/backend/app/services/llm/service.py
@@ -186,7 +186,13 @@ class LLMService:
             total_latency = (time.time() - start_time) * 1000
             error_code = getattr(e, 'error_code', e.__class__.__name__)
 
-            
+            logger.exception(
+                "Chat completion failed for provider %s (model=%s, latency=%.2fms, error=%s)",
+                provider_name,
+                request.model,
+                total_latency,
+                error_code,
+            )
             raise
     
     async def create_chat_completion_stream(self, request: ChatRequest) -> AsyncGenerator[Dict[str, Any], None]:
@@ -220,6 +226,12 @@ class LLMService:
         except Exception as e:
             # Record streaming failure - metrics disabled
             error_code = getattr(e, 'error_code', e.__class__.__name__)
+            logger.exception(
+                "Streaming chat completion failed for provider %s (model=%s, error=%s)",
+                provider_name,
+                request.model,
+                error_code,
+            )
             raise
     
     async def create_embedding(self, request: EmbeddingRequest) -> EmbeddingResponse:
@@ -261,6 +273,13 @@ class LLMService:
             # Record failed request - metrics disabled
             total_latency = (time.time() - start_time) * 1000
             error_code = getattr(e, 'error_code', e.__class__.__name__)            
+            logger.exception(
+                "Embedding request failed for provider %s (model=%s, latency=%.2fms, error=%s)",
+                provider_name,
+                request.model,
+                total_latency,
+                error_code,
+            )
             raise
     
     async def get_models(self, provider_name: Optional[str] = None) -> List[ModelInfo]:
@@ -378,4 +397,4 @@ class LLMService:
 
 
 # Global LLM service instance
-llm_service = LLMService()
\ No newline at end of file
+llm_service = LLMService()
diff --git a/backend/app/services/module_manager.py b/backend/app/services/module_manager.py
index 4f6e58b..877864b 100644
--- a/backend/app/services/module_manager.py
+++ b/backend/app/services/module_manager.py
@@ -38,26 +38,49 @@ class ModuleConfig:
 class ModuleFileWatcher(FileSystemEventHandler):
     """Watch for changes in module files"""
     
-    def __init__(self, module_manager):
+    def __init__(self, module_manager, modules_root: Path):
         self.module_manager = module_manager
+        self.modules_root = modules_root.resolve()
+        
+    def _resolve_module_name(self, src_path: str) -> Optional[str]:
+        try:
+            relative_path = Path(src_path).resolve().relative_to(self.modules_root)
+        except ValueError:
+            return None
+        
+        parts = relative_path.parts
+        return parts[0] if parts else None
         
     def on_modified(self, event):
         if event.is_directory or not event.src_path.endswith('.py'):
             return
             
-        # Extract module name from path
-        path_parts = Path(event.src_path).parts
-        if 'modules' in path_parts:
-            modules_index = path_parts.index('modules')
-            if modules_index + 1 < len(path_parts):
-                module_name = path_parts[modules_index + 1]
-                if module_name in self.module_manager.modules:
-                    log_module_event("hot_reload", "file_changed", {
-                        "module": module_name,
-                        "file": event.src_path
-                    })
-                    # Schedule reload
-                    asyncio.create_task(self.module_manager.reload_module(module_name))
+        module_name = self._resolve_module_name(event.src_path)
+        if not module_name or module_name not in self.module_manager.modules:
+            return
+        
+        log_module_event("hot_reload", "file_changed", {
+            "module": module_name,
+            "file": event.src_path
+        })
+        
+        loop = self.module_manager.loop
+        if not loop or loop.is_closed():
+            logger.debug("Hot reload skipped for %s; event loop unavailable", module_name)
+            return
+        
+        try:
+            future = asyncio.run_coroutine_threadsafe(
+                self.module_manager.reload_module(module_name),
+                loop,
+            )
+            future.add_done_callback(
+                lambda f: f.exception() and logger.warning(
+                    "Module reload error for %s: %s", module_name, f.exception()
+                )
+            )
+        except RuntimeError as exc:
+            logger.debug("Hot reload scheduling failed for %s: %s", module_name, exc)
 
 
 class ModuleManager:
@@ -71,12 +94,16 @@ class ModuleManager:
         self.hot_reload_enabled = True
         self.file_observer = None
         self.fastapi_app = None
+        self.loop: Optional[asyncio.AbstractEventLoop] = None
+        self.modules_root = (Path(__file__).resolve().parent.parent / "modules").resolve()
     
     async def initialize(self, fastapi_app=None):
         """Initialize the module manager and load all modules"""
         if self.initialized:
             return
         
+        self.loop = asyncio.get_running_loop()
+        
         # Store FastAPI app reference for router registration
         self.fastapi_app = fastapi_app
         
@@ -103,10 +130,15 @@ class ModuleManager:
         """Load module configurations from dynamic discovery"""
         # Initialize permission system
         permission_registry.register_platform_permissions()
+        self.module_configs = {}
         
         # Discover modules dynamically from filesystem
         try:
-            discovered_manifests = await module_config_manager.discover_modules("modules")
+            if not self.modules_root.exists():
+                logger.warning("Modules directory not found at %s", self.modules_root)
+                return
+            
+            discovered_manifests = await module_config_manager.discover_modules(str(self.modules_root))
             
             # Load saved configurations
             await module_config_manager.load_saved_configs()
@@ -206,45 +238,26 @@ class ModuleManager:
         try:
             log_module_event(module_name, "loading", {"config": config.config})
             
-            # Check if module exists in the modules directory
-            # Try multiple possible locations in order of preference
-            possible_paths = [
-                Path(f"modules/{module_name}"),  # Docker container path
-                Path(f"modules/{module_name}"),  # Container path
-                Path(f"app/modules/{module_name}")  # Legacy path
-            ]
+            # Check if module exists in the canonical modules directory
+            module_dir = self.modules_root / module_name
+            modules_base_path = self.modules_root.parent
             
-            module_dir = None
-            modules_base_path = None
+            if not module_dir.exists():
+                raise ModuleLoadError(f"Module {module_name} not found at {module_dir}")
             
-            for path in possible_paths:
-                if path.exists():
-                    module_dir = path
-                    modules_base_path = path.parent
-                    break
+            # Ensure the parent app directory is on sys.path for imports
+            modules_path_str = str(modules_base_path.absolute())
+            if modules_path_str not in sys.path:
+                sys.path.insert(0, modules_path_str)
             
-            if module_dir and module_dir.exists():
-                # Use direct import from modules directory
-                module_path = f"modules.{module_name}.main"
-                
-                # Add modules directory to Python path if not already there
-                modules_path_str = str(modules_base_path.absolute())
-                if modules_path_str not in sys.path:
-                    sys.path.insert(0, modules_path_str)
-                
-                # Force reload if already imported
-                if module_path in sys.modules:
-                    importlib.reload(sys.modules[module_path])
-                    module = sys.modules[module_path]
-                else:
-                    module = importlib.import_module(module_path)
+            module_path = f"app.modules.{module_name}.main"
+            
+            # Force reload if already imported
+            if module_path in sys.modules:
+                importlib.reload(sys.modules[module_path])
+                module = sys.modules[module_path]
             else:
-                # Final fallback - try app.modules path (legacy)
-                try:
-                    module_path = f"app.modules.{module_name}.main"
-                    module = importlib.import_module(module_path)
-                except ImportError:
-                    raise ModuleLoadError(f"Module {module_name} not found in any expected location: {[str(p) for p in possible_paths]}")
+                module = importlib.import_module(module_path)
             
             # Get the module instance - try multiple patterns
             module_instance = None
@@ -484,7 +497,15 @@ class ModuleManager:
                 except Exception as e:
                     log_module_event(module_name, "shutdown_error", {"error": str(e)})
         
+        if self.file_observer:
+            try:
+                self.file_observer.stop()
+                await asyncio.to_thread(self.file_observer.join)
+            finally:
+                self.file_observer = None
+        
         self.initialized = False
+        self.loop = None
         log_module_event("module_manager", "shutdown_complete", {"success": True})
     
     async def cleanup(self):
@@ -494,27 +515,18 @@ class ModuleManager:
     async def _start_file_watcher(self):
         """Start watching module files for changes"""
         try:
-            # Try multiple possible locations for modules directory
-            possible_modules_paths = [
-                Path("modules"),  # Docker container path
-                Path("modules"),  # Container path
-                Path("app/modules")  # Legacy path
-            ]
+            if self.file_observer:
+                return
             
-            modules_path = None
-            for path in possible_modules_paths:
-                if path.exists():
-                    modules_path = path
-                    break
+            if not self.modules_root.exists():
+                log_module_event("hot_reload", "watcher_skipped", {"reason": f"No modules directory at {self.modules_root}"})
+                return
             
-            if modules_path and modules_path.exists():
-                self.file_observer = Observer()
-                event_handler = ModuleFileWatcher(self)
-                self.file_observer.schedule(event_handler, str(modules_path), recursive=True)
-                self.file_observer.start()
-                log_module_event("hot_reload", "watcher_started", {"path": str(modules_path)})
-            else:
-                log_module_event("hot_reload", "watcher_skipped", {"reason": "No modules directory found"})
+            self.file_observer = Observer()
+            event_handler = ModuleFileWatcher(self, self.modules_root)
+            self.file_observer.schedule(event_handler, str(self.modules_root), recursive=True)
+            self.file_observer.start()
+            log_module_event("hot_reload", "watcher_started", {"path": str(self.modules_root)})
         except Exception as e:
             log_module_event("hot_reload", "watcher_failed", {"error": str(e)})
     
@@ -672,4 +684,4 @@ class ModuleManager:
 
 
 # Global module manager instance
-module_manager = ModuleManager()
\ No newline at end of file
+module_manager = ModuleManager()
diff --git a/backend/app/services/permission_manager.py b/backend/app/services/permission_manager.py
index 4f10b92..93880e0 100644
--- a/backend/app/services/permission_manager.py
+++ b/backend/app/services/permission_manager.py
@@ -132,6 +132,7 @@ class ModulePermissionRegistry:
         self.module_permissions: Dict[str, List[Permission]] = {}
         self.role_permissions: Dict[str, List[str]] = {}
         self.default_roles = self._initialize_default_roles()
+        self._platform_permissions_registered = False
     
     def _initialize_default_roles(self) -> Dict[str, List[str]]:
         """Initialize default permission roles"""
@@ -177,6 +178,9 @@ class ModulePermissionRegistry:
     
     def register_platform_permissions(self):
         """Register core platform permissions"""
+        if self._platform_permissions_registered:
+            return
+        
         platform_permissions = [
             Permission("users", "create", "Create users"),
             Permission("users", "read", "View users"),
@@ -232,6 +236,7 @@ class ModulePermissionRegistry:
             self.tree.add_permission(perm_string, perm)
         
         logger.info("Registered platform and LLM permissions")
+        self._platform_permissions_registered = True
     
     def check_permission(self, user_permissions: List[str], required: str, 
                         context: Dict[str, Any] = None) -> bool:
@@ -407,4 +412,4 @@ def require_permission(user_permissions: List[str], required_permission: str, co
 
 
 # Global permission registry instance
-permission_registry = ModulePermissionRegistry()
\ No newline at end of file
+permission_registry = ModulePermissionRegistry()
diff --git a/backend/modules/chatbot/__init__.py b/backend/modules/chatbot/__init__.py
deleted file mode 100644
index 5131eeb..0000000
--- a/backend/modules/chatbot/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""
-Chatbot Module - AI Chatbot with RAG Integration
-
-This module provides AI chatbot capabilities with:
-- Multiple personality types (Assistant, Customer Support, Teacher, etc.)
-- RAG integration for knowledge-based responses
-- Conversation memory and context management
-- Workflow integration as building blocks
-- UI-configurable settings
-"""
-
-from .main import ChatbotModule, create_module
-
-__version__ = "1.0.0"
-__author__ = "Enclava Team"
-
-# Export main classes for easy importing
-__all__ = [
-    "ChatbotModule", 
-    "create_module"
-]
\ No newline at end of file
diff --git a/backend/modules/chatbot/config_schema.json b/backend/modules/chatbot/config_schema.json
deleted file mode 100644
index d4a5dc7..0000000
--- a/backend/modules/chatbot/config_schema.json
+++ /dev/null
@@ -1,126 +0,0 @@
-{
-  "title": "Chatbot Configuration",
-  "type": "object",
-  "properties": {
-    "name": {
-      "type": "string",
-      "title": "Chatbot Name",
-      "description": "Display name for this chatbot instance",
-      "minLength": 1,
-      "maxLength": 100
-    },
-    "chatbot_type": {
-      "type": "string",
-      "title": "Chatbot Type",
-      "description": "Select the type of chatbot personality",
-      "enum": ["assistant", "customer_support", "teacher", "researcher", "creative_writer", "custom"],
-      "enumNames": ["General Assistant", "Customer Support", "Teacher", "Researcher", "Creative Writer", "Custom"],
-      "default": "assistant"
-    },
-    "model": {
-      "type": "string",
-      "title": "AI Model",
-      "description": "Choose the LLM model for responses",
-      "enum": ["gpt-4", "gpt-3.5-turbo", "claude-3-sonnet", "claude-3-opus", "llama-70b"],
-      "default": "gpt-3.5-turbo"
-    },
-    "system_prompt": {
-      "type": "string",
-      "title": "System Prompt",
-      "description": "Define the chatbot's personality and behavior instructions",
-      "ui:widget": "textarea",
-      "ui:options": {
-        "rows": 6,
-        "placeholder": "You are a helpful AI assistant..."
-      }
-    },
-    "use_rag": {
-      "type": "boolean",
-      "title": "Enable Knowledge Base",
-      "description": "Use RAG to search knowledge base for context",
-      "default": false
-    },
-    "rag_collection": {
-      "type": "string",
-      "title": "Knowledge Base Collection",
-      "description": "Select which document collection to search",
-      "ui:widget": "rag-collection-selector",
-      "ui:condition": "use_rag === true"
-    },
-    "rag_top_k": {
-      "type": "integer",
-      "title": "Knowledge Base Results",
-      "description": "Number of relevant documents to include",
-      "minimum": 1,
-      "maximum": 10,
-      "default": 5,
-      "ui:condition": "use_rag === true"
-    },
-    "temperature": {
-      "type": "number",
-      "title": "Response Creativity",
-      "description": "Controls randomness (0.0 = focused, 1.0 = creative)",
-      "minimum": 0,
-      "maximum": 1,
-      "default": 0.7,
-      "ui:widget": "range",
-      "ui:options": {
-        "step": 0.1
-      }
-    },
-    "max_tokens": {
-      "type": "integer",
-      "title": "Maximum Response Length",
-      "description": "Maximum number of tokens in response",
-      "minimum": 50,
-      "maximum": 4000,
-      "default": 1000,
-      "ui:widget": "range",
-      "ui:options": {
-        "step": 50
-      }
-    },
-    "memory_length": {
-      "type": "integer",
-      "title": "Conversation Memory",
-      "description": "Number of previous message pairs to remember",
-      "minimum": 1,
-      "maximum": 50,
-      "default": 10,
-      "ui:widget": "range"
-    },
-    "fallback_responses": {
-      "type": "array",
-      "title": "Fallback Responses",
-      "description": "Responses to use when the AI cannot answer",
-      "items": {
-        "type": "string",
-        "title": "Fallback Response"
-      },
-      "default": [
-        "I'm not sure how to help with that. Could you please rephrase your question?",
-        "I don't have enough information to answer that question accurately.",
-        "That's outside my knowledge area. Is there something else I can help you with?"
-      ],
-      "ui:options": {
-        "orderable": true,
-        "addable": true,
-        "removable": true
-      }
-    }
-  },
-  "required": ["name", "chatbot_type", "model"],
-  "ui:order": [
-    "name", 
-    "chatbot_type", 
-    "model", 
-    "system_prompt", 
-    "use_rag", 
-    "rag_collection", 
-    "rag_top_k", 
-    "temperature", 
-    "max_tokens", 
-    "memory_length", 
-    "fallback_responses"
-  ]
-}
\ No newline at end of file
diff --git a/backend/modules/chatbot/examples/customer_support_workflow.json b/backend/modules/chatbot/examples/customer_support_workflow.json
deleted file mode 100644
index 7d22781..0000000
--- a/backend/modules/chatbot/examples/customer_support_workflow.json
+++ /dev/null
@@ -1,182 +0,0 @@
-{
-  "name": "Customer Support Workflow",
-  "description": "Intelligent customer support workflow with intent classification, knowledge base search, and chatbot response generation",
-  "version": "1.0",
-  "variables": {
-    "support_chatbot_id": "cs-bot-001",
-    "escalation_threshold": 0.3,
-    "max_attempts": 3
-  },
-  "steps": [
-    {
-      "id": "classify_intent",
-      "name": "Classify Customer Intent",
-      "type": "llm_call",
-      "model": "gpt-3.5-turbo",
-      "messages": [
-        {
-          "role": "system",
-          "content": "You are an intent classifier for customer support. Classify the customer message into one of these categories: technical_issue, billing_question, feature_request, complaint, general_inquiry. Also provide a confidence score between 0 and 1. Respond with JSON: {\"intent\": \"category\", \"confidence\": 0.95, \"reasoning\": \"explanation\"}"
-        },
-        {
-          "role": "user", 
-          "content": "{{ inputs.customer_message }}"
-        }
-      ],
-      "output_variable": "intent_classification"
-    },
-    
-    {
-      "id": "search_knowledge_base",
-      "name": "Search Knowledge Base",
-      "type": "workflow_step",
-      "module": "rag",
-      "action": "search",
-      "config": {
-        "query": "{{ inputs.customer_message }}",
-        "collection": "support_documentation",
-        "top_k": 5,
-        "include_metadata": true
-      },
-      "output_variable": "knowledge_results"
-    },
-    
-    {
-      "id": "check_confidence",
-      "name": "Check Intent Confidence",
-      "type": "condition",
-      "condition": "JSON.parse(steps.classify_intent.result).confidence > variables.escalation_threshold",
-      "true_steps": [
-        {
-          "id": "generate_chatbot_response",
-          "name": "Generate Chatbot Response",
-          "type": "workflow_step",
-          "module": "chatbot",
-          "action": "workflow_chat_step", 
-          "config": {
-            "message": "{{ inputs.customer_message }}",
-            "chatbot_id": "{{ variables.support_chatbot_id }}",
-            "use_rag": true,
-            "context": {
-              "intent": "{{ steps.classify_intent.result }}",
-              "knowledge_base_results": "{{ steps.search_knowledge_base.result }}",
-              "customer_history": "{{ inputs.customer_history }}",
-              "additional_instructions": "Be empathetic and professional. If you cannot fully resolve the issue, offer to escalate to a human agent."
-            }
-          },
-          "output_variable": "chatbot_response"
-        },
-        
-        {
-          "id": "analyze_response_quality",
-          "name": "Analyze Response Quality",
-          "type": "llm_call",
-          "model": "gpt-3.5-turbo",
-          "messages": [
-            {
-              "role": "system",
-              "content": "Analyze if this customer support response adequately addresses the customer's question. Consider completeness, accuracy, and helpfulness. Respond with JSON: {\"quality_score\": 0.85, \"is_adequate\": true, \"requires_escalation\": false, \"reasoning\": \"explanation\"}"
-            },
-            {
-              "role": "user",
-              "content": "Customer Question: {{ inputs.customer_message }}\\n\\nChatbot Response: {{ steps.generate_chatbot_response.result.response }}\\n\\nKnowledge Base Context: {{ steps.search_knowledge_base.result }}"
-            }
-          ],
-          "output_variable": "response_quality"
-        },
-        
-        {
-          "id": "final_response_decision",
-          "name": "Final Response Decision", 
-          "type": "condition",
-          "condition": "JSON.parse(steps.analyze_response_quality.result).is_adequate === true",
-          "true_steps": [
-            {
-              "id": "send_chatbot_response",
-              "name": "Send Chatbot Response",
-              "type": "output",
-              "config": {
-                "response_type": "chatbot_response",
-                "message": "{{ steps.generate_chatbot_response.result.response }}",
-                "sources": "{{ steps.generate_chatbot_response.result.sources }}",
-                "confidence": "{{ JSON.parse(steps.classify_intent.result).confidence }}",
-                "quality_score": "{{ JSON.parse(steps.analyze_response_quality.result).quality_score }}"
-              }
-            }
-          ],
-          "false_steps": [
-            {
-              "id": "escalate_to_human",
-              "name": "Escalate to Human Agent",
-              "type": "output",
-              "config": {
-                "response_type": "human_escalation",
-                "message": "I'd like to connect you with one of our human support agents who can better assist with your specific situation. Please hold on while I transfer you.",
-                "escalation_reason": "Response quality below threshold",
-                "intent": "{{ steps.classify_intent.result }}",
-                "attempted_response": "{{ steps.generate_chatbot_response.result.response }}",
-                "priority": "normal"
-              }
-            }
-          ]
-        }
-      ],
-      "false_steps": [
-        {
-          "id": "low_confidence_escalation",
-          "name": "Low Confidence Escalation",
-          "type": "output", 
-          "config": {
-            "response_type": "human_escalation",
-            "message": "I want to make sure you get the best possible help. Let me connect you with one of our human support agents.",
-            "escalation_reason": "Low intent classification confidence",
-            "intent": "{{ steps.classify_intent.result }}",
-            "priority": "high"
-          }
-        }
-      ]
-    },
-    
-    {
-      "id": "log_interaction",
-      "name": "Log Customer Interaction",
-      "type": "workflow_step",
-      "module": "analytics",
-      "action": "log_event",
-      "config": {
-        "event_type": "customer_support_interaction",
-        "data": {
-          "customer_message": "{{ inputs.customer_message }}",
-          "intent_classification": "{{ steps.classify_intent.result }}",
-          "response_generated": "{{ steps.generate_chatbot_response.result.response }}",
-          "knowledge_base_used": "{{ steps.search_knowledge_base.result }}",
-          "escalated": "{{ outputs.response_type === 'human_escalation' }}",
-          "workflow_execution_time": "{{ execution_time }}",
-          "timestamp": "{{ current_timestamp }}"
-        }
-      }
-    }
-  ],
-  
-  "outputs": {
-    "response_type": "string",
-    "message": "string", 
-    "sources": "array",
-    "escalation_reason": "string",
-    "confidence": "number",
-    "quality_score": "number"
-  },
-  
-  "error_handling": {
-    "retry_failed_steps": true,
-    "max_retries": 2,
-    "fallback_response": "I apologize, but I'm experiencing technical difficulties. Please contact our support team directly for assistance."
-  },
-  
-  "metadata": {
-    "created_by": "support_team",
-    "use_case": "customer_support_automation",
-    "tags": ["customer_support", "chatbot", "rag", "escalation"],
-    "estimated_execution_time": "5-15 seconds"
-  }
-}
\ No newline at end of file
diff --git a/backend/modules/chatbot/main.py b/backend/modules/chatbot/main.py
deleted file mode 100644
index 0dd46f9..0000000
--- a/backend/modules/chatbot/main.py
+++ /dev/null
@@ -1,949 +0,0 @@
-"""
-Chatbot Module Implementation
-
-Provides AI chatbot capabilities with:
-- RAG integration for knowledge-based responses
-- Custom prompts and personalities
-- Conversation memory and context
-- Workflow integration as building blocks
-- UI-configurable settings
-"""
-
-import json
-from pprint import pprint
-import uuid
-from datetime import datetime, timedelta
-from typing import Dict, List, Any, Optional, Union
-from dataclasses import dataclass
-from pydantic import BaseModel, Field
-from enum import Enum
-
-from fastapi import APIRouter, HTTPException, Depends
-from sqlalchemy.orm import Session
-
-from app.core.logging import get_logger
-from app.services.llm.service import llm_service
-from app.services.llm.models import ChatRequest as LLMChatRequest, ChatMessage as LLMChatMessage
-from app.services.llm.exceptions import LLMError, ProviderError, SecurityError
-from app.services.base_module import BaseModule, Permission
-from app.models.user import User
-from app.models.chatbot import ChatbotInstance as DBChatbotInstance, ChatbotConversation as DBConversation, ChatbotMessage as DBMessage, ChatbotAnalytics
-from app.core.security import get_current_user
-from app.db.database import get_db
-from app.core.config import settings
-
-# Import protocols for type hints and dependency injection
-from ..protocols import RAGServiceProtocol
-# Note: LiteLLMClientProtocol replaced with direct LLM service usage
-
-logger = get_logger(__name__)
-
-
-class ChatbotType(str, Enum):
-    """Types of chatbot personalities"""
-    ASSISTANT = "assistant"
-    CUSTOMER_SUPPORT = "customer_support"
-    TEACHER = "teacher" 
-    RESEARCHER = "researcher"
-    CREATIVE_WRITER = "creative_writer"
-    CUSTOM = "custom"
-
-
-class MessageRole(str, Enum):
-    """Message roles in conversation"""
-    USER = "user"
-    ASSISTANT = "assistant" 
-    SYSTEM = "system"
-
-
-@dataclass
-class ChatbotConfig:
-    """Chatbot configuration"""
-    name: str
-    chatbot_type: str  # Changed from ChatbotType enum to str to allow custom types
-    model: str
-    rag_collection: Optional[str] = None
-    system_prompt: str = ""
-    temperature: float = 0.7
-    max_tokens: int = 1000
-    memory_length: int = 10  # Number of previous messages to remember
-    use_rag: bool = False
-    rag_top_k: int = 5
-    rag_score_threshold: float = 0.02  # Lowered from default 0.3 to allow more results
-    fallback_responses: List[str] = None
-    
-    def __post_init__(self):
-        if self.fallback_responses is None:
-            self.fallback_responses = [
-                "I'm not sure how to help with that. Could you please rephrase your question?",
-                "I don't have enough information to answer that question accurately.",
-                "That's outside my knowledge area. Is there something else I can help you with?"
-            ]
-
-
-class ChatMessage(BaseModel):
-    """Individual chat message"""
-    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
-    role: MessageRole
-    content: str
-    timestamp: datetime = Field(default_factory=datetime.utcnow)
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-    sources: Optional[List[Dict[str, Any]]] = None
-
-
-class Conversation(BaseModel):
-    """Conversation state"""
-    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
-    chatbot_id: str
-    user_id: str
-    messages: List[ChatMessage] = Field(default_factory=list)
-    created_at: datetime = Field(default_factory=datetime.utcnow)
-    updated_at: datetime = Field(default_factory=datetime.utcnow)
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-
-
-class ChatRequest(BaseModel):
-    """Chat completion request"""
-    message: str
-    conversation_id: Optional[str] = None
-    chatbot_id: str
-    use_rag: Optional[bool] = None
-    context: Optional[Dict[str, Any]] = None
-
-
-class ChatResponse(BaseModel):
-    """Chat completion response"""
-    response: str
-    conversation_id: str
-    message_id: str
-    sources: Optional[List[Dict[str, Any]]] = None
-    metadata: Dict[str, Any] = Field(default_factory=dict)
-
-
-class ChatbotInstance(BaseModel):
-    """Configured chatbot instance"""
-    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
-    name: str
-    config: ChatbotConfig
-    created_by: str
-    created_at: datetime = Field(default_factory=datetime.utcnow)
-    updated_at: datetime = Field(default_factory=datetime.utcnow)
-    is_active: bool = True
-
-
-class ChatbotModule(BaseModule):
-    """Main chatbot module implementation"""
-    
-    def __init__(self, rag_service: Optional[RAGServiceProtocol] = None):
-        super().__init__("chatbot")
-        self.rag_module = rag_service  # Keep same name for compatibility
-        self.db_session = None
-        
-        # System prompts will be loaded from database
-        self.system_prompts = {}
-    
-    async def initialize(self, **kwargs):
-        """Initialize the chatbot module"""
-        await super().initialize(**kwargs)
-        
-        # Initialize the LLM service
-        await llm_service.initialize()
-        
-        # Get RAG module dependency if not already injected
-        if not self.rag_module:
-            try:
-                # Try to get RAG module from module manager
-                from app.services.module_manager import module_manager
-                if hasattr(module_manager, 'modules') and 'rag' in module_manager.modules:
-                    self.rag_module = module_manager.modules['rag']
-                    logger.info("RAG module injected from module manager")
-            except Exception as e:
-                logger.warning(f"Could not inject RAG module: {e}")
-        
-        # Load prompt templates from database
-        await self._load_prompt_templates()
-        
-        logger.info("Chatbot module initialized")
-        logger.info(f"LLM service available: {llm_service._initialized}")
-        logger.info(f"RAG module available after init: {self.rag_module is not None}")
-        logger.info(f"Loaded {len(self.system_prompts)} prompt templates")
-    
-    async def _ensure_dependencies(self):
-        """Lazy load dependencies if not available"""
-        # Ensure LLM service is initialized
-        if not llm_service._initialized:
-            await llm_service.initialize()
-            logger.info("LLM service lazy loaded")
-        
-        if not self.rag_module:
-            try:
-                # Try to get RAG module from module manager
-                from app.services.module_manager import module_manager
-                if hasattr(module_manager, 'modules') and 'rag' in module_manager.modules:
-                    self.rag_module = module_manager.modules['rag']
-                    logger.info("RAG module lazy loaded from module manager")
-            except Exception as e:
-                logger.warning(f"Could not lazy load RAG module: {e}")
-    
-    async def _load_prompt_templates(self):
-        """Load prompt templates from database"""
-        try:
-            from app.db.database import SessionLocal
-            from app.models.prompt_template import PromptTemplate
-            from sqlalchemy import select
-            
-            db = SessionLocal()
-            try:
-                result = db.execute(
-                    select(PromptTemplate)
-                    .where(PromptTemplate.is_active == True)
-                )
-                templates = result.scalars().all()
-                
-                for template in templates:
-                    self.system_prompts[template.type_key] = template.system_prompt
-                    
-                logger.info(f"Loaded {len(self.system_prompts)} prompt templates from database")
-                
-            finally:
-                db.close()
-                
-        except Exception as e:
-            logger.warning(f"Could not load prompt templates from database: {e}")
-            # Fallback to hardcoded prompts
-            self.system_prompts = {
-                "assistant": "You are a helpful AI assistant. Provide accurate, concise, and friendly responses. Always aim to be helpful while being honest about your limitations.",
-                "customer_support": "You are a professional customer support representative. Be empathetic, professional, and solution-focused in all interactions.",
-                "teacher": "You are an experienced educational tutor. Break down complex concepts into understandable parts. Be patient, supportive, and encouraging.",
-                "researcher": "You are a thorough research assistant with a focus on accuracy and evidence-based information.",
-                "creative_writer": "You are an experienced creative writing mentor and storytelling expert.",
-                "custom": "You are a helpful AI assistant. Your personality and behavior will be defined by custom instructions."
-            }
-    
-    async def get_system_prompt_for_type(self, chatbot_type: str) -> str:
-        """Get system prompt for a specific chatbot type"""
-        if chatbot_type in self.system_prompts:
-            return self.system_prompts[chatbot_type]
-        
-        # If not found, try to reload templates
-        await self._load_prompt_templates()
-        
-        return self.system_prompts.get(chatbot_type, self.system_prompts.get("assistant", 
-            "You are a helpful AI assistant. Provide accurate, concise, and friendly responses."))
-    
-    async def create_chatbot(self, config: ChatbotConfig, user_id: str, db: Session) -> ChatbotInstance:
-        """Create a new chatbot instance"""
-        
-        # Set system prompt based on type if not provided or empty
-        if not config.system_prompt or config.system_prompt.strip() == "":
-            config.system_prompt = await self.get_system_prompt_for_type(config.chatbot_type)
-        
-        # Create database record
-        db_chatbot = DBChatbotInstance(
-            name=config.name,
-            description=f"{config.chatbot_type.replace('_', ' ').title()} chatbot",
-            config=config.__dict__,
-            created_by=user_id
-        )
-        
-        db.add(db_chatbot)
-        db.commit()
-        db.refresh(db_chatbot)
-        
-        # Convert to response model
-        chatbot = ChatbotInstance(
-            id=db_chatbot.id,
-            name=db_chatbot.name,
-            config=ChatbotConfig(**db_chatbot.config),
-            created_by=db_chatbot.created_by,
-            created_at=db_chatbot.created_at,
-            updated_at=db_chatbot.updated_at,
-            is_active=db_chatbot.is_active
-        )
-        
-        logger.info(f"Created new chatbot: {chatbot.name} ({chatbot.id})")
-        return chatbot
-    
-    async def chat_completion(self, request: ChatRequest, user_id: str, db: Session) -> ChatResponse:
-        """Generate chat completion response"""
-        
-        # Get chatbot configuration from database
-        db_chatbot = db.query(DBChatbotInstance).filter(DBChatbotInstance.id == request.chatbot_id).first()
-        if not db_chatbot:
-            raise HTTPException(status_code=404, detail="Chatbot not found")
-        
-        chatbot_config = ChatbotConfig(**db_chatbot.config)
-        
-        # Get or create conversation
-        conversation = await self._get_or_create_conversation(
-            request.conversation_id, request.chatbot_id, user_id, db
-        )
-        
-        # Create user message
-        user_message = DBMessage(
-            conversation_id=conversation.id,
-            role=MessageRole.USER.value,
-            content=request.message
-        )
-        db.add(user_message)
-        db.commit()
-        db.refresh(user_message)
-        
-        logger.info(f"Created user message with ID {user_message.id} for conversation {conversation.id}")
-        
-        try:
-            # Force the session to see the committed changes
-            db.expire_all()
-            
-            # Get conversation history for context - includes the current message we just created
-            # Fetch up to memory_length pairs of messages (user + assistant)
-            # The +1 ensures we include the current message if we're at the limit
-            messages = db.query(DBMessage).filter(
-                DBMessage.conversation_id == conversation.id
-            ).order_by(DBMessage.timestamp.desc()).limit(chatbot_config.memory_length * 2 + 1).all()
-            
-            logger.info(f"Query for conversation_id={conversation.id}, memory_length={chatbot_config.memory_length}")
-            logger.info(f"Found {len(messages)} messages in conversation history")
-            
-            # If we don't have any messages, manually add the user message we just created
-            if len(messages) == 0:
-                logger.warning(f"No messages found in query, but we just created message {user_message.id}")
-                logger.warning(f"Using the user message we just created")
-                messages = [user_message]
-            
-            for idx, msg in enumerate(messages):
-                logger.info(f"Message {idx}: id={msg.id}, role={msg.role}, content_preview={msg.content[:50] if msg.content else 'None'}...")
-            
-            # Generate response
-            response_content, sources = await self._generate_response(
-                request.message, messages, chatbot_config, request.context, db
-            )
-            
-            # Create assistant message
-            assistant_message = DBMessage(
-                conversation_id=conversation.id,
-                role=MessageRole.ASSISTANT.value,
-                content=response_content,
-                sources=sources,
-                metadata={"model": chatbot_config.model, "temperature": chatbot_config.temperature}
-            )
-            db.add(assistant_message)
-            db.commit()
-            db.refresh(assistant_message)
-            
-            # Update conversation timestamp
-            conversation.updated_at = datetime.utcnow()
-            db.commit()
-            
-            return ChatResponse(
-                response=response_content,
-                conversation_id=conversation.id,
-                message_id=assistant_message.id,
-                sources=sources
-            )
-            
-        except Exception as e:
-            logger.error(f"Chat completion failed: {e}")
-            # Return fallback response
-            fallback = chatbot_config.fallback_responses[0] if chatbot_config.fallback_responses else "I'm having trouble responding right now."
-            
-            assistant_message = DBMessage(
-                conversation_id=conversation.id,
-                role=MessageRole.ASSISTANT.value,
-                content=fallback,
-                metadata={"error": str(e), "fallback": True}
-            )
-            db.add(assistant_message)
-            db.commit()
-            db.refresh(assistant_message)
-            
-            return ChatResponse(
-                response=fallback,
-                conversation_id=conversation.id,
-                message_id=assistant_message.id,
-                metadata={"error": str(e), "fallback": True}
-            )
-    
-    async def _generate_response(self, message: str, db_messages: List[DBMessage], 
-                               config: ChatbotConfig, context: Optional[Dict] = None, db: Session = None) -> tuple[str, Optional[List]]:
-        """Generate response using LLM with optional RAG"""
-        
-        # Lazy load dependencies if not available
-        await self._ensure_dependencies()
-        
-        sources = None
-        rag_context = ""
-        
-        # RAG search if enabled
-        if config.use_rag and config.rag_collection and self.rag_module:
-            logger.info(f"RAG search enabled for collection: {config.rag_collection}")
-            try:
-                # Get the Qdrant collection name from RAG collection
-                qdrant_collection_name = await self._get_qdrant_collection_name(config.rag_collection, db)
-                logger.info(f"Qdrant collection name: {qdrant_collection_name}")
-                
-                if qdrant_collection_name:
-                    logger.info(f"Searching RAG documents: query='{message[:50]}...', max_results={config.rag_top_k}")
-                    rag_results = await self.rag_module.search_documents(
-                        query=message,
-                        max_results=config.rag_top_k,
-                        collection_name=qdrant_collection_name,
-                        score_threshold=config.rag_score_threshold
-                    )
-                    
-                    if rag_results:
-                        logger.info(f"RAG search found {len(rag_results)} results")
-                        sources = [{"title": f"Document {i+1}", "content": result.document.content[:200]} 
-                                  for i, result in enumerate(rag_results)]
-                        
-                        # Build full RAG context from all results
-                        rag_context = "\n\nRelevant information from knowledge base:\n" + "\n\n".join([
-                            f"[Document {i+1}]:\n{result.document.content}" for i, result in enumerate(rag_results)
-                        ])
-                        
-                        # Detailed RAG logging - ALWAYS log for debugging
-                        logger.info("=== COMPREHENSIVE RAG SEARCH RESULTS ===")
-                        logger.info(f"Query: '{message}'")
-                        logger.info(f"Collection: {qdrant_collection_name}")
-                        logger.info(f"Number of results: {len(rag_results)}")
-                        for i, result in enumerate(rag_results):
-                            logger.info(f"\n--- RAG Result {i+1} ---")
-                            logger.info(f"Score: {getattr(result, 'score', 'N/A')}")
-                            logger.info(f"Document ID: {getattr(result.document, 'id', 'N/A')}")
-                            logger.info(f"Full Content ({len(result.document.content)} chars):")
-                            logger.info(f"{result.document.content}")
-                            if hasattr(result.document, 'metadata'):
-                                logger.info(f"Metadata: {result.document.metadata}")
-                        logger.info(f"\n=== RAG CONTEXT BEING ADDED TO PROMPT ({len(rag_context)} chars) ===")
-                        logger.info(rag_context)
-                        logger.info("=== END RAG SEARCH RESULTS ===")
-                    else:
-                        logger.warning("RAG search returned no results")
-                else:
-                    logger.warning(f"RAG collection '{config.rag_collection}' not found in database")
-                    
-            except Exception as e:
-                logger.warning(f"RAG search failed: {e}")
-                import traceback
-                logger.warning(f"RAG search traceback: {traceback.format_exc()}")
-        
-        # Build conversation context (includes the current message from db_messages)
-        messages = self._build_conversation_messages(db_messages, config, rag_context, context)
-        
-        # Note: Current user message is already included in db_messages from the query
-        logger.info(f"Built conversation context with {len(messages)} messages")
-        
-        # LLM completion
-        logger.info(f"Attempting LLM completion with model: {config.model}")
-        logger.info(f"Messages to send: {len(messages)} messages")
-        
-        # Always log detailed prompts for debugging
-        logger.info("=== COMPREHENSIVE LLM REQUEST ===")
-        logger.info(f"Model: {config.model}")
-        logger.info(f"Temperature: {config.temperature}")
-        logger.info(f"Max tokens: {config.max_tokens}")
-        logger.info(f"RAG enabled: {config.use_rag}")
-        logger.info(f"RAG collection: {config.rag_collection}")
-        if config.use_rag and rag_context:
-            logger.info(f"RAG context added: {len(rag_context)} characters")
-            logger.info(f"RAG sources: {len(sources) if sources else 0} documents")
-        logger.info("\n=== COMPLETE MESSAGES SENT TO LLM ===")
-        for i, msg in enumerate(messages):
-            logger.info(f"\n--- Message {i+1} ---")
-            logger.info(f"Role: {msg['role']}")
-            logger.info(f"Content ({len(msg['content'])} chars):")
-            # Truncate long content for logging (full RAG context can be very long)
-            if len(msg['content']) > 500:
-                logger.info(f"{msg['content'][:500]}... [truncated, total {len(msg['content'])} chars]")
-            else:
-                logger.info(msg['content'])
-        logger.info("=== END COMPREHENSIVE LLM REQUEST ===")
-        
-        try:
-            logger.info("Calling LLM service create_chat_completion...")
-            
-            # Convert messages to LLM service format
-            llm_messages = [LLMChatMessage(role=msg["role"], content=msg["content"]) for msg in messages]
-            
-            # Create LLM service request
-            llm_request = LLMChatRequest(
-                model=config.model,
-                messages=llm_messages,
-                temperature=config.temperature,
-                max_tokens=config.max_tokens,
-                user_id="chatbot_user",
-                api_key_id=0  # Chatbot module uses internal service
-            )
-            
-            # Make request to LLM service
-            llm_response = await llm_service.create_chat_completion(llm_request)
-            
-            # Extract response content
-            if llm_response.choices:
-                content = llm_response.choices[0].message.content
-                logger.info(f"Response content length: {len(content)}")
-                
-                # Always log response for debugging
-                logger.info("=== COMPREHENSIVE LLM RESPONSE ===")
-                logger.info(f"Response content ({len(content)} chars):")
-                logger.info(content)
-                if llm_response.usage:
-                    usage = llm_response.usage
-                    logger.info(f"Token usage - Prompt: {usage.prompt_tokens}, Completion: {usage.completion_tokens}, Total: {usage.total_tokens}")
-                if sources:
-                    logger.info(f"RAG sources included: {len(sources)} documents")
-                logger.info("=== END COMPREHENSIVE LLM RESPONSE ===")
-                
-                return content, sources
-            else:
-                logger.warning("No choices in LLM response")
-                return "I received an empty response from the AI model.", sources
-                
-        except SecurityError as e:
-            logger.error(f"Security error in LLM completion: {e}")
-            raise HTTPException(status_code=400, detail=f"Security validation failed: {e.message}")
-        except ProviderError as e:
-            logger.error(f"Provider error in LLM completion: {e}")
-            raise HTTPException(status_code=503, detail="LLM service temporarily unavailable")
-        except LLMError as e:
-            logger.error(f"LLM service error: {e}")
-            raise HTTPException(status_code=500, detail="LLM service error")
-        except Exception as e:
-            logger.error(f"LLM completion failed: {e}")
-            # Return fallback if available
-            return "I'm currently unable to process your request. Please try again later.", None
-    
-    def _build_conversation_messages(self, db_messages: List[DBMessage], config: ChatbotConfig, 
-                                   rag_context: str = "", context: Optional[Dict] = None) -> List[Dict]:
-        """Build messages array for LLM completion"""
-        
-        messages = []
-        
-        # System prompt
-        system_prompt = config.system_prompt
-        if rag_context:
-            # Add explicit instruction to use RAG context
-            system_prompt += "\n\nIMPORTANT: Use the following information from the knowledge base to answer the user's question. " \
-                           "This information is directly relevant to their query and should be your primary source:\n" + rag_context
-        if context and context.get('additional_instructions'):
-            system_prompt += f"\n\nAdditional instructions: {context['additional_instructions']}"
-            
-        messages.append({"role": "system", "content": system_prompt})
-        
-        logger.info(f"Building messages from {len(db_messages)} database messages")
-        
-        # Conversation history (messages are already limited by memory_length in the query)
-        # Reverse to get chronological order
-        # Include ALL messages - the current user message is needed for the LLM to respond!
-        for idx, msg in enumerate(reversed(db_messages)):
-            logger.info(f"Processing message {idx}: role={msg.role}, content_preview={msg.content[:50] if msg.content else 'None'}...")
-            if msg.role in ["user", "assistant"]:
-                messages.append({
-                    "role": msg.role,
-                    "content": msg.content
-                })
-                logger.info(f"Added message with role {msg.role} to LLM messages")
-            else:
-                logger.info(f"Skipped message with role {msg.role}")
-        
-        logger.info(f"Final messages array has {len(messages)} messages")  # For debugging, can be removed in production
-        return messages
-    
-    async def _get_or_create_conversation(self, conversation_id: Optional[str], 
-                                        chatbot_id: str, user_id: str, db: Session) -> DBConversation:
-        """Get existing conversation or create new one"""
-        
-        if conversation_id:
-            conversation = db.query(DBConversation).filter(DBConversation.id == conversation_id).first()
-            if conversation:
-                return conversation
-        
-        # Create new conversation
-        conversation = DBConversation(
-            chatbot_id=chatbot_id,
-            user_id=user_id,
-            title="New Conversation"
-        )
-        
-        db.add(conversation)
-        db.commit()
-        db.refresh(conversation)
-        return conversation
-    
-    def get_router(self) -> APIRouter:
-        """Get FastAPI router for chatbot endpoints"""
-        router = APIRouter(prefix="/chatbot", tags=["chatbot"])
-        
-        @router.post("/chat", response_model=ChatResponse)
-        async def chat_endpoint(
-            request: ChatRequest,
-            current_user: User = Depends(get_current_user),
-            db: Session = Depends(get_db)
-        ):
-            """Chat completion endpoint"""
-            return await self.chat_completion(request, str(current_user['id']), db)
-        
-        @router.post("/create", response_model=ChatbotInstance)
-        async def create_chatbot_endpoint(
-            config: ChatbotConfig,
-            current_user: User = Depends(get_current_user),
-            db: Session = Depends(get_db)
-        ):
-            """Create new chatbot instance"""
-            return await self.create_chatbot(config, str(current_user['id']), db)
-        
-        @router.get("/list", response_model=List[ChatbotInstance])
-        async def list_chatbots_endpoint(
-            current_user: User = Depends(get_current_user),
-            db: Session = Depends(get_db)
-        ):
-            """List user's chatbots"""
-            db_chatbots = db.query(DBChatbotInstance).filter(
-                (DBChatbotInstance.created_by == str(current_user['id'])) | 
-                (DBChatbotInstance.created_by == "system")
-            ).all()
-            
-            chatbots = []
-            for db_chatbot in db_chatbots:
-                chatbot = ChatbotInstance(
-                    id=db_chatbot.id,
-                    name=db_chatbot.name,
-                    config=ChatbotConfig(**db_chatbot.config),
-                    created_by=db_chatbot.created_by,
-                    created_at=db_chatbot.created_at,
-                    updated_at=db_chatbot.updated_at,
-                    is_active=db_chatbot.is_active
-                )
-                chatbots.append(chatbot)
-            
-            return chatbots
-        
-        @router.get("/conversations/{conversation_id}", response_model=Conversation)
-        async def get_conversation_endpoint(
-            conversation_id: str,
-            current_user: User = Depends(get_current_user),
-            db: Session = Depends(get_db)
-        ):
-            """Get conversation history"""
-            conversation = db.query(DBConversation).filter(
-                DBConversation.id == conversation_id
-            ).first()
-            
-            if not conversation:
-                raise HTTPException(status_code=404, detail="Conversation not found")
-            
-            # Check if user owns this conversation
-            if conversation.user_id != str(current_user['id']):
-                raise HTTPException(status_code=403, detail="Not authorized")
-            
-            # Get messages
-            messages = db.query(DBMessage).filter(
-                DBMessage.conversation_id == conversation_id
-            ).order_by(DBMessage.timestamp).all()
-            
-            # Convert to response model
-            chat_messages = []
-            for msg in messages:
-                chat_message = ChatMessage(
-                    id=msg.id,
-                    role=MessageRole(msg.role),
-                    content=msg.content,
-                    timestamp=msg.timestamp,
-                    metadata=msg.metadata or {},
-                    sources=msg.sources
-                )
-                chat_messages.append(chat_message)
-            
-            response_conversation = Conversation(
-                id=conversation.id,
-                chatbot_id=conversation.chatbot_id,
-                user_id=conversation.user_id,
-                messages=chat_messages,
-                created_at=conversation.created_at,
-                updated_at=conversation.updated_at,
-                metadata=conversation.context_data or {}
-            )
-            
-            return response_conversation
-        
-        @router.get("/types", response_model=List[Dict[str, str]])
-        async def get_chatbot_types_endpoint():
-            """Get available chatbot types and their descriptions"""
-            return [
-                {"type": "assistant", "name": "General Assistant", "description": "Helpful AI assistant for general questions"},
-                {"type": "customer_support", "name": "Customer Support", "description": "Professional customer service chatbot"},
-                {"type": "teacher", "name": "Teacher", "description": "Educational tutor and learning assistant"},
-                {"type": "researcher", "name": "Researcher", "description": "Research assistant with fact-checking focus"},
-                {"type": "creative_writer", "name": "Creative Writer", "description": "Creative writing and storytelling assistant"},
-                {"type": "custom", "name": "Custom", "description": "Custom chatbot with user-defined personality"}
-            ]
-        
-        return router
-    
-    # API Compatibility Methods
-    async def chat(self, chatbot_config: Dict[str, Any], message: str, 
-                   conversation_history: List = None, user_id: str = "anonymous") -> Dict[str, Any]:
-        """Chat method for API compatibility"""
-        logger.info(f"Chat method called with message: {message[:50]}... by user: {user_id}")
-        
-        # Lazy load dependencies
-        await self._ensure_dependencies()
-        
-        logger.info(f"LLM service available: {llm_service._initialized}")
-        logger.info(f"RAG module available: {self.rag_module is not None}")
-        
-        try:
-            # Create a minimal database session for the chat
-            from app.db.database import SessionLocal
-            db = SessionLocal()
-            
-            try:
-                # Convert config dict to ChatbotConfig
-                config = ChatbotConfig(
-                    name=chatbot_config.get("name", "Unknown"),
-                    chatbot_type=chatbot_config.get("chatbot_type", "assistant"),
-                    model=chatbot_config.get("model", "gpt-3.5-turbo"),
-                    system_prompt=chatbot_config.get("system_prompt", ""),
-                    temperature=chatbot_config.get("temperature", 0.7),
-                    max_tokens=chatbot_config.get("max_tokens", 1000),
-                    memory_length=chatbot_config.get("memory_length", 10),
-                    use_rag=chatbot_config.get("use_rag", False),
-                    rag_collection=chatbot_config.get("rag_collection"),
-                    rag_top_k=chatbot_config.get("rag_top_k", 5),
-                    fallback_responses=chatbot_config.get("fallback_responses", [])
-                )
-                
-                # Generate response using internal method
-                # Create a temporary message object for the current user message
-                temp_messages = [
-                    DBMessage(
-                        id=0,
-                        conversation_id=0,
-                        role="user",
-                        content=message,
-                        timestamp=datetime.utcnow(),
-                        metadata={}
-                    )
-                ]
-
-                response_content, sources = await self._generate_response(
-                    message, temp_messages, config, None, db
-                )
-                
-                return {
-                    "response": response_content,
-                    "sources": sources,
-                    "conversation_id": None,
-                    "message_id": f"msg_{uuid.uuid4()}"
-                }
-                
-            finally:
-                db.close()
-                
-        except Exception as e:
-            logger.error(f"Chat method failed: {e}")
-            fallback_responses = chatbot_config.get("fallback_responses", [
-                "I'm sorry, I'm having trouble processing your request right now."
-            ])
-            return {
-                "response": fallback_responses[0] if fallback_responses else "I'm sorry, I couldn't process your request.",
-                "sources": None,
-                "conversation_id": None,
-                "message_id": f"msg_{uuid.uuid4()}"
-            }
-
-    # Workflow Integration Methods
-    async def workflow_chat_step(self, context: Dict[str, Any], step_config: Dict[str, Any], db: Session) -> Dict[str, Any]:
-        """Execute chatbot as a workflow step"""
-        
-        message = step_config.get('message', '')
-        chatbot_id = step_config.get('chatbot_id')
-        use_rag = step_config.get('use_rag', False)
-        
-        # Template substitution from context
-        message = self._substitute_template_variables(message, context)
-        
-        request = ChatRequest(
-            message=message,
-            chatbot_id=chatbot_id,
-            use_rag=use_rag,
-            context=step_config.get('context', {})
-        )
-        
-        # Use system user for workflow executions
-        response = await self.chat_completion(request, "workflow_system", db)
-        
-        return {
-            "response": response.response,
-            "conversation_id": response.conversation_id,
-            "sources": response.sources,
-            "metadata": response.metadata
-        }
-    
-    def _substitute_template_variables(self, template: str, context: Dict[str, Any]) -> str:
-        """Simple template variable substitution"""
-        import re
-        
-        def replace_var(match):
-            var_path = match.group(1)
-            try:
-                # Simple dot notation support: context.user.name
-                value = context
-                for part in var_path.split('.'):
-                    value = value[part]
-                return str(value)
-            except (KeyError, TypeError):
-                return match.group(0)  # Return original if not found
-        
-        return re.sub(r'\\{\\{\\s*([^}]+)\\s*\\}\\}', replace_var, template)
-    
-    async def _get_qdrant_collection_name(self, collection_identifier: str, db: Session) -> Optional[str]:
-        """Get Qdrant collection name from RAG collection ID, name, or direct Qdrant collection"""
-        try:
-            from app.models.rag_collection import RagCollection
-            from sqlalchemy import select
-            
-            logger.info(f"Looking up RAG collection with identifier: '{collection_identifier}'")
-            
-            # First check if this collection exists in Qdrant directly
-            # Qdrant is the source of truth for collections
-            if True:  # Always check Qdrant first
-                # Check if this collection exists in Qdrant directly
-                actual_collection_name = collection_identifier
-                # Remove "ext_" prefix if present
-                if collection_identifier.startswith("ext_"):
-                    actual_collection_name = collection_identifier[4:]
-                
-                logger.info(f"Checking if '{actual_collection_name}' exists in Qdrant directly")
-                if self.rag_module:
-                    try:
-                        # Try to verify the collection exists in Qdrant
-                        from qdrant_client import QdrantClient
-                        qdrant_client = QdrantClient(host="enclava-qdrant", port=6333)
-                        collections = qdrant_client.get_collections()
-                        collection_names = [c.name for c in collections.collections]
-                        
-                        if actual_collection_name in collection_names:
-                            logger.info(f"Found Qdrant collection directly: {actual_collection_name}")
-
-                            # Auto-register the collection in the database if not found
-                            await self._auto_register_collection(actual_collection_name, db)
-
-                            return actual_collection_name
-                    except Exception as e:
-                        logger.warning(f"Error checking Qdrant collections: {e}")
-            
-            rag_collection = None
-            
-            # Then try PostgreSQL lookup by ID if numeric
-            if collection_identifier.isdigit():
-                logger.info(f"Treating '{collection_identifier}' as collection ID")
-                stmt = select(RagCollection).where(
-                    RagCollection.id == int(collection_identifier),
-                    RagCollection.is_active == True
-                )
-                result = db.execute(stmt)
-                rag_collection = result.scalar_one_or_none()
-            
-            # If not found by ID, try to look up by name in PostgreSQL
-            if not rag_collection:
-                logger.info(f"Collection not found by ID, trying by name: '{collection_identifier}'")
-                stmt = select(RagCollection).where(
-                    RagCollection.name == collection_identifier,
-                    RagCollection.is_active == True
-                )
-                result = db.execute(stmt)
-                rag_collection = result.scalar_one_or_none()
-            
-            if rag_collection:
-                logger.info(f"Found RAG collection: ID={rag_collection.id}, name='{rag_collection.name}', qdrant_collection='{rag_collection.qdrant_collection_name}'")
-                return rag_collection.qdrant_collection_name
-            else:
-                logger.warning(f"RAG collection '{collection_identifier}' not found in database (tried both ID and name)")
-                return None
-
-        except Exception as e:
-            logger.error(f"Error looking up RAG collection '{collection_identifier}': {e}")
-            import traceback
-            logger.error(f"Traceback: {traceback.format_exc()}")
-            return None
-
-    async def _auto_register_collection(self, collection_name: str, db: Session) -> None:
-        """Automatically register a Qdrant collection in the database"""
-        try:
-            from app.models.rag_collection import RagCollection
-            from sqlalchemy import select
-
-            # Check if already registered
-            stmt = select(RagCollection).where(
-                RagCollection.qdrant_collection_name == collection_name
-            )
-            result = db.execute(stmt)
-            existing = result.scalar_one_or_none()
-
-            if existing:
-                logger.info(f"Collection '{collection_name}' already registered in database")
-                return
-
-            # Create a readable name from collection name
-            display_name = collection_name.replace("-", " ").replace("_", " ").title()
-
-            # Auto-register the collection
-            new_collection = RagCollection(
-                name=display_name,
-                qdrant_collection_name=collection_name,
-                description=f"Auto-discovered collection from Qdrant: {collection_name}",
-                is_active=True
-            )
-
-            db.add(new_collection)
-            db.commit()
-
-            logger.info(f"Auto-registered Qdrant collection '{collection_name}' in database")
-
-        except Exception as e:
-            logger.error(f"Failed to auto-register collection '{collection_name}': {e}")
-            # Don't re-raise - this should not block collection usage
-
-    # Required abstract methods from BaseModule
-    
-    async def cleanup(self):
-        """Cleanup chatbot module resources"""
-        logger.info("Chatbot module cleanup completed")
-    
-    def get_required_permissions(self) -> List[Permission]:
-        """Get required permissions for chatbot module"""
-        return [
-            Permission("chatbots", "create", "Create chatbot instances"),
-            Permission("chatbots", "configure", "Configure chatbot settings"),
-            Permission("chatbots", "chat", "Use chatbot for conversations"),
-            Permission("chatbots", "manage", "Manage all chatbots")
-        ]
-    
-    async def process_request(self, request_type: str, data: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
-        """Process chatbot requests"""
-        if request_type == "chat":
-            # Handle chat requests
-            chat_request = ChatRequest(**data)
-            user_id = context.get("user_id", "anonymous")
-            db = context.get("db")
-            
-            if db:
-                response = await self.chat_completion(chat_request, user_id, db)
-                return {
-                    "success": True,
-                    "response": response.response,
-                    "conversation_id": response.conversation_id,
-                    "sources": response.sources
-                }
-        
-        return {"success": False, "error": f"Unknown request type: {request_type}"}
-
-
-# Module factory function
-def create_module(rag_service: Optional[RAGServiceProtocol] = None) -> ChatbotModule:
-    """Factory function to create chatbot module instance"""
-    return ChatbotModule(rag_service=rag_service)
-
-# Create module instance (dependencies will be injected via factory)
-chatbot_module = ChatbotModule()
diff --git a/backend/modules/chatbot/module.yaml b/backend/modules/chatbot/module.yaml
deleted file mode 100644
index 7d9fbd3..0000000
--- a/backend/modules/chatbot/module.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-name: chatbot
-version: 1.0.0
-description: "AI Chatbot with RAG integration and customizable prompts"
-author: "Enclava Team"
-category: "conversation"
-
-# Module lifecycle
-enabled: true
-auto_start: true
-dependencies: 
-  - rag
-optional_dependencies:
-  - analytics
-
-# Configuration
-config_schema: "./config_schema.json"
-ui_components: "./ui_components/"
-
-# Module capabilities
-provides:
-  - "chat_completion"
-  - "conversation_management"
-  - "chatbot_configuration"
-
-consumes:
-  - "rag_search"
-  - "llm_completion"
-
-# API endpoints
-endpoints:
-  - path: "/chatbot/chat"
-    method: "POST"
-    description: "Generate chat completion"
-    
-  - path: "/chatbot/create"
-    method: "POST" 
-    description: "Create new chatbot instance"
-    
-  - path: "/chatbot/list"
-    method: "GET"
-    description: "List user chatbots"
-
-# UI Configuration
-ui_config:
-  icon: "message-circle"
-  color: "#10B981"
-  category: "AI & ML"
-  
-  # Configuration forms
-  forms:
-    - name: "basic_config"
-      title: "Basic Settings"
-      fields: ["name", "chatbot_type", "model"]
-      
-    - name: "personality"
-      title: "Personality & Prompts"
-      fields: ["system_prompt", "temperature", "fallback_responses"]
-      
-    - name: "knowledge_base"
-      title: "Knowledge Base"
-      fields: ["use_rag", "rag_collection", "rag_top_k"]
-      
-    - name: "advanced"
-      title: "Advanced Settings"
-      fields: ["max_tokens", "memory_length"]
-
-# Permissions
-permissions:
-  - name: "chatbot.create"
-    description: "Create new chatbot instances"
-    
-  - name: "chatbot.configure" 
-    description: "Configure chatbot settings"
-    
-  - name: "chatbot.chat"
-    description: "Use chatbot for conversations"
-    
-  - name: "chatbot.manage"
-    description: "Manage all chatbots (admin)"
-
-# Analytics events
-analytics_events:
-  - name: "chatbot_created"
-    description: "New chatbot instance created"
-    
-  - name: "chat_message_sent"
-    description: "User sent message to chatbot"
-    
-  - name: "chat_response_generated" 
-    description: "Chatbot generated response"
-    
-  - name: "rag_context_used"
-    description: "RAG context was used in response"
-
-# Health checks
-health_checks:
-  - name: "llm_connectivity"
-    description: "Check LLM client connection"
-    
-  - name: "rag_availability"
-    description: "Check RAG module availability"
-    
-  - name: "conversation_memory"
-    description: "Check conversation storage health"
-
-# Documentation
-documentation:
-  readme: "./README.md"
-  examples: "./examples/"
-  api_docs: "./docs/api.md"
\ No newline at end of file
diff --git a/backend/modules/factory.py b/backend/modules/factory.py
deleted file mode 100644
index 7e4590a..0000000
--- a/backend/modules/factory.py
+++ /dev/null
@@ -1,225 +0,0 @@
-"""
-Module Factory for Confidential Empire
-
-This factory creates and wires up all modules with their dependencies.
-It ensures proper dependency injection while maintaining optimal performance
-through direct method calls and minimal indirection.
-"""
-
-from typing import Dict, Optional, Any
-import logging
-
-# Import all modules
-from .rag.main import RAGModule  
-from .chatbot.main import ChatbotModule, create_module as create_chatbot_module
-from .workflow.main import WorkflowModule
-
-# Import services that modules depend on
-from app.services.litellm_client import LiteLLMClient
-
-# Import protocols for type safety
-from .protocols import (
-    RAGServiceProtocol, 
-    ChatbotServiceProtocol, 
-    LiteLLMClientProtocol,
-    WorkflowServiceProtocol,
-    ServiceRegistry
-)
-
-logger = logging.getLogger(__name__)
-
-
-class ModuleFactory:
-    """Factory for creating and wiring module dependencies"""
-    
-    def __init__(self):
-        self.modules: Dict[str, Any] = {}
-        self.initialized = False
-    
-    async def create_all_modules(self, config: Optional[Dict[str, Any]] = None) -> ServiceRegistry:
-        """
-        Create all modules with proper dependency injection
-        
-        Args:
-            config: Optional configuration for modules
-            
-        Returns:
-            Dictionary of created modules with their dependencies wired
-        """
-        config = config or {}
-        
-        logger.info("Creating modules with dependency injection...")
-        
-        # Step 1: Create LiteLLM client (shared dependency)
-        litellm_client = LiteLLMClient()
-        
-        # Step 2: Create RAG module (no dependencies on other modules)
-        rag_module = RAGModule(config=config.get("rag", {}))
-        
-        # Step 3: Create chatbot module with RAG dependency
-        chatbot_module = create_chatbot_module(
-            litellm_client=litellm_client,
-            rag_service=rag_module  # RAG module implements RAGServiceProtocol
-        )
-        
-        # Step 4: Create workflow module with chatbot dependency  
-        workflow_module = WorkflowModule(
-            chatbot_service=chatbot_module  # Chatbot module implements ChatbotServiceProtocol
-        )
-        
-        # Store all modules
-        modules = {
-            "rag": rag_module,
-            "chatbot": chatbot_module,
-            "workflow": workflow_module
-        }
-        
-        logger.info(f"Created {len(modules)} modules with dependencies wired")
-        
-        # Initialize all modules
-        await self._initialize_modules(modules, config)
-        
-        self.modules = modules
-        self.initialized = True
-        
-        return modules
-    
-    async def _initialize_modules(self, modules: Dict[str, Any], config: Dict[str, Any]):
-        """Initialize all modules in dependency order"""
-        
-        # Initialize in dependency order (modules with no deps first)
-        initialization_order = [
-            ("rag", modules["rag"]),
-            ("chatbot", modules["chatbot"]),  # Depends on RAG
-            ("workflow", modules["workflow"])  # Depends on Chatbot
-        ]
-        
-        for module_name, module in initialization_order:
-            try:
-                logger.info(f"Initializing {module_name} module...")
-                module_config = config.get(module_name, {})
-                
-                # Different modules have different initialization patterns
-                if hasattr(module, 'initialize'):
-                    if module_name == "rag":
-                        await module.initialize()
-                    else:
-                        await module.initialize(**module_config)
-                
-                logger.info(f"✅ {module_name} module initialized successfully")
-                
-            except Exception as e:
-                logger.error(f"❌ Failed to initialize {module_name} module: {e}")
-                raise RuntimeError(f"Module initialization failed: {module_name}") from e
-    
-    async def cleanup_all_modules(self):
-        """Cleanup all modules in reverse dependency order"""
-        if not self.initialized:
-            return
-            
-        # Cleanup in reverse order
-        cleanup_order = ["workflow", "chatbot", "rag"]
-        
-        for module_name in cleanup_order:
-            if module_name in self.modules:
-                try:
-                    logger.info(f"Cleaning up {module_name} module...")
-                    module = self.modules[module_name]
-                    if hasattr(module, 'cleanup'):
-                        await module.cleanup()
-                    logger.info(f"✅ {module_name} module cleaned up")
-                except Exception as e:
-                    logger.error(f"❌ Error cleaning up {module_name}: {e}")
-        
-        self.modules.clear()
-        self.initialized = False
-    
-    def get_module(self, name: str) -> Optional[Any]:
-        """Get a module by name"""
-        return self.modules.get(name)
-    
-    def is_initialized(self) -> bool:
-        """Check if factory is initialized"""
-        return self.initialized
-
-
-# Global factory instance
-module_factory = ModuleFactory()
-
-
-# Convenience functions for external use
-async def create_modules(config: Optional[Dict[str, Any]] = None) -> ServiceRegistry:
-    """Create all modules with dependencies wired"""
-    return await module_factory.create_all_modules(config)
-
-
-async def cleanup_modules():
-    """Cleanup all modules"""
-    await module_factory.cleanup_all_modules()
-
-
-def get_module(name: str) -> Optional[Any]:
-    """Get a module by name"""
-    return module_factory.get_module(name)
-
-
-def get_all_modules() -> Dict[str, Any]:
-    """Get all modules"""
-    return module_factory.modules.copy()
-
-
-# Factory functions for individual modules (for testing/special cases)
-def create_rag_module(config: Optional[Dict[str, Any]] = None) -> RAGModule:
-    """Create RAG module"""
-    return RAGModule(config=config or {})
-
-
-def create_chatbot_with_rag(rag_service: RAGServiceProtocol, 
-                           litellm_client: LiteLLMClientProtocol) -> ChatbotModule:
-    """Create chatbot module with RAG dependency"""
-    return create_chatbot_module(litellm_client=litellm_client, rag_service=rag_service)
-
-
-def create_workflow_with_chatbot(chatbot_service: ChatbotServiceProtocol) -> WorkflowModule:
-    """Create workflow module with chatbot dependency"""
-    return WorkflowModule(chatbot_service=chatbot_service)
-
-
-# Module registry for backward compatibility
-class ModuleRegistry:
-    """Registry that provides access to modules (for backward compatibility)"""
-    
-    def __init__(self, factory: ModuleFactory):
-        self._factory = factory
-    
-    @property
-    def modules(self) -> Dict[str, Any]:
-        """Get all modules (compatible with existing module_manager interface)"""
-        return self._factory.modules
-    
-    def get(self, name: str) -> Optional[Any]:
-        """Get module by name"""
-        return self._factory.get_module(name)
-    
-    def __getitem__(self, name: str) -> Any:
-        """Support dictionary-style access"""
-        module = self.get(name)
-        if module is None:
-            raise KeyError(f"Module '{name}' not found")
-        return module
-    
-    def keys(self):
-        """Get module names"""
-        return self._factory.modules.keys()
-    
-    def values(self):
-        """Get module instances"""  
-        return self._factory.modules.values()
-    
-    def items(self):
-        """Get module name-instance pairs"""
-        return self._factory.modules.items()
-
-
-# Create registry instance for backward compatibility
-module_registry = ModuleRegistry(module_factory)
\ No newline at end of file
diff --git a/backend/modules/protocols.py b/backend/modules/protocols.py
deleted file mode 100644
index 2aec3b2..0000000
--- a/backend/modules/protocols.py
+++ /dev/null
@@ -1,258 +0,0 @@
-"""
-Module Protocols for Confidential Empire
-
-This file defines the interface contracts that modules must implement for inter-module communication.
-Using Python protocols provides compile-time type checking with zero runtime overhead.
-"""
-
-from typing import Protocol, Dict, List, Any, Optional, Union
-from datetime import datetime
-from abc import abstractmethod
-
-
-class RAGServiceProtocol(Protocol):
-    """Protocol for RAG (Retrieval-Augmented Generation) service interface"""
-    
-    @abstractmethod
-    async def search(self, query: str, collection_name: str, top_k: int) -> Dict[str, Any]:
-        """
-        Search for relevant documents
-        
-        Args:
-            query: Search query string
-            collection_name: Name of the collection to search in
-            top_k: Number of top results to return
-            
-        Returns:
-            Dictionary containing search results with 'results' key
-        """
-        ...
-    
-    @abstractmethod
-    async def index_document(self, content: str, metadata: Dict[str, Any] = None) -> str:
-        """
-        Index a document in the vector database
-        
-        Args:
-            content: Document content to index
-            metadata: Optional metadata for the document
-            
-        Returns:
-            Document ID
-        """
-        ...
-    
-    @abstractmethod
-    async def delete_document(self, document_id: str) -> bool:
-        """
-        Delete a document from the vector database
-        
-        Args:
-            document_id: ID of document to delete
-            
-        Returns:
-            True if successfully deleted
-        """
-        ...
-
-
-class ChatbotServiceProtocol(Protocol):
-    """Protocol for Chatbot service interface"""
-    
-    @abstractmethod
-    async def chat_completion(self, request: Any, user_id: str, db: Any) -> Any:
-        """
-        Generate chat completion response
-        
-        Args:
-            request: Chat request object
-            user_id: ID of the user making the request
-            db: Database session
-            
-        Returns:
-            Chat response object
-        """
-        ...
-    
-    @abstractmethod
-    async def create_chatbot(self, config: Any, user_id: str, db: Any) -> Any:
-        """
-        Create a new chatbot instance
-        
-        Args:
-            config: Chatbot configuration
-            user_id: ID of the user creating the chatbot
-            db: Database session
-            
-        Returns:
-            Created chatbot instance
-        """
-        ...
-
-
-class LiteLLMClientProtocol(Protocol):
-    """Protocol for LiteLLM client interface"""
-    
-    @abstractmethod
-    async def completion(self, model: str, messages: List[Dict[str, str]], **kwargs) -> Any:
-        """
-        Create a completion using the specified model
-        
-        Args:
-            model: Model name to use
-            messages: List of messages for the conversation
-            **kwargs: Additional parameters for the completion
-            
-        Returns:
-            Completion response object
-        """
-        ...
-    
-    @abstractmethod
-    async def create_chat_completion(self, model: str, messages: List[Dict[str, str]], 
-                                   user_id: str, api_key_id: str, **kwargs) -> Any:
-        """
-        Create a chat completion with user tracking
-        
-        Args:
-            model: Model name to use
-            messages: List of messages for the conversation
-            user_id: ID of the user making the request
-            api_key_id: API key identifier
-            **kwargs: Additional parameters
-            
-        Returns:
-            Chat completion response
-        """
-        ...
-
-
-class CacheServiceProtocol(Protocol):
-    """Protocol for Cache service interface"""
-    
-    @abstractmethod
-    async def get(self, key: str, default: Any = None) -> Any:
-        """
-        Get value from cache
-        
-        Args:
-            key: Cache key
-            default: Default value if key not found
-            
-        Returns:
-            Cached value or default
-        """
-        ...
-    
-    @abstractmethod
-    async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> bool:
-        """
-        Set value in cache
-        
-        Args:
-            key: Cache key
-            value: Value to cache
-            ttl: Time to live in seconds
-            
-        Returns:
-            True if successfully cached
-        """
-        ...
-    
-    @abstractmethod
-    async def delete(self, key: str) -> bool:
-        """
-        Delete key from cache
-        
-        Args:
-            key: Cache key to delete
-            
-        Returns:
-            True if successfully deleted
-        """
-        ...
-
-
-class SecurityServiceProtocol(Protocol):
-    """Protocol for Security service interface"""
-    
-    @abstractmethod
-    async def analyze_request(self, request: Any) -> Any:
-        """
-        Perform security analysis on a request
-        
-        Args:
-            request: Request object to analyze
-            
-        Returns:
-            Security analysis result
-        """
-        ...
-    
-    @abstractmethod
-    async def validate_request(self, request: Any) -> bool:
-        """
-        Validate request for security compliance
-        
-        Args:
-            request: Request object to validate
-            
-        Returns:
-            True if request is valid/safe
-        """
-        ...
-
-
-class WorkflowServiceProtocol(Protocol):
-    """Protocol for Workflow service interface"""
-    
-    @abstractmethod
-    async def execute_workflow(self, workflow: Any, input_data: Dict[str, Any] = None) -> Any:
-        """
-        Execute a workflow definition
-        
-        Args:
-            workflow: Workflow definition to execute
-            input_data: Optional input data for the workflow
-            
-        Returns:
-            Workflow execution result
-        """
-        ...
-    
-    @abstractmethod
-    async def get_execution(self, execution_id: str) -> Any:
-        """
-        Get workflow execution status
-        
-        Args:
-            execution_id: ID of the execution to retrieve
-            
-        Returns:
-            Execution status object
-        """
-        ...
-
-
-class ModuleServiceProtocol(Protocol):
-    """Base protocol for all module services"""
-    
-    @abstractmethod
-    async def initialize(self, **kwargs) -> None:
-        """Initialize the module"""
-        ...
-    
-    @abstractmethod
-    async def cleanup(self) -> None:
-        """Cleanup module resources"""
-        ...
-    
-    @abstractmethod
-    def get_required_permissions(self) -> List[Any]:
-        """Get required permissions for this module"""
-        ...
-
-
-# Type aliases for common service combinations
-ServiceRegistry = Dict[str, ModuleServiceProtocol]
-ServiceDependencies = Dict[str, Optional[ModuleServiceProtocol]]
\ No newline at end of file
diff --git a/backend/modules/rag/__init__.py b/backend/modules/rag/__init__.py
deleted file mode 100644
index 7cd6008..0000000
--- a/backend/modules/rag/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""
-RAG (Retrieval-Augmented Generation) module for Confidential Empire platform
-"""
-from .main import RAGModule
-
-__all__ = ["RAGModule"]
\ No newline at end of file
diff --git a/backend/modules/rag/main.py b/backend/modules/rag/main.py
deleted file mode 100644
index c3035f0..0000000
--- a/backend/modules/rag/main.py
+++ /dev/null
@@ -1,2083 +0,0 @@
-"""
-RAG module implementation with vector database and document processing
-Includes comprehensive document processing, content extraction, and NLP analysis
-"""
-import asyncio
-import io
-import json
-import logging
-import mimetypes
-import re
-import time
-from typing import Any, Dict, List, Optional, Tuple, Union
-from datetime import datetime
-from dataclasses import dataclass, asdict
-from pathlib import Path
-import hashlib
-import base64
-import numpy as np
-import uuid
-
-# Initialize logger early
-logger = logging.getLogger(__name__)
-
-# Document processing libraries (with graceful fallbacks)
-try:
-    import nltk
-    from nltk.tokenize import sent_tokenize, word_tokenize
-    from nltk.corpus import stopwords
-    from nltk.stem import WordNetLemmatizer
-    NLTK_AVAILABLE = True
-except ImportError:
-    logger.warning("NLTK not available - NLP features will be limited")
-    NLTK_AVAILABLE = False
-    
-try:
-    import spacy
-    SPACY_AVAILABLE = True
-except ImportError:
-    logger.warning("spaCy not available - entity extraction will be disabled")
-    SPACY_AVAILABLE = False
-    
-try:
-    from markitdown import MarkItDown
-    MARKITDOWN_AVAILABLE = True
-except ImportError:
-    logger.warning("MarkItDown not available - document conversion will be limited")
-    MARKITDOWN_AVAILABLE = False
-
-try:
-    from docx import Document as DocxDocument
-    PYTHON_DOCX_AVAILABLE = True
-except ImportError:
-    logger.warning("python-docx not available - DOCX processing will be limited")
-    PYTHON_DOCX_AVAILABLE = False
-
-from qdrant_client import QdrantClient
-from qdrant_client.models import Distance, VectorParams, PointStruct, ScoredPoint, Filter, FieldCondition, MatchValue
-from qdrant_client.http import models
-import tiktoken
-
-from app.core.config import settings
-from app.core.logging import log_module_event
-from app.services.base_module import BaseModule, Permission
-
-
-@dataclass
-class ProcessedDocument:
-    """Processed document data structure"""
-    id: str
-    original_filename: str
-    file_type: str
-    mime_type: str
-    content: str
-    extracted_text: str
-    metadata: Dict[str, Any]
-    word_count: int
-    sentence_count: int
-    language: str
-    entities: List[Dict[str, Any]]
-    keywords: List[str]
-    processing_time: float
-    processed_at: datetime
-    file_hash: str
-    file_size: int
-    embedding: Optional[List[float]] = None
-    created_at: datetime = None
-    
-    def __post_init__(self):
-        if self.created_at is None:
-            self.created_at = datetime.utcnow()
-
-
-@dataclass
-class ContentValidationResult:
-    """Content validation result"""
-    is_valid: bool
-    issues: List[str]
-    security_score: float
-    content_type: str
-    language_confidence: float
-
-
-# Keep Document class for backward compatibility
-@dataclass
-class Document:
-    """Simple document data structure for backward compatibility"""
-    id: str
-    content: str
-    metadata: Dict[str, Any]
-    embedding: Optional[List[float]] = None
-    created_at: datetime = None
-    
-    def __post_init__(self):
-        if self.created_at is None:
-            self.created_at = datetime.utcnow()
-
-
-@dataclass
-class SearchResult:
-    """Search result data structure"""
-    document: Document
-    score: float
-    relevance_score: float
-
-
-class RAGModule(BaseModule):
-    """RAG module for document storage, retrieval, and augmented generation with integrated content processing"""
-    
-    def __init__(self, config: Dict[str, Any] = None):
-        super().__init__(module_id="rag", config=config)
-        self.enabled = False
-        self.qdrant_client: Optional[QdrantClient] = None
-        self.default_collection_name = "documents"  # Keep for backward compatibility
-        self.embedding_model = None
-        self.embedding_service = None
-        self.tokenizer = None
-
-        # Set improved default configuration
-        self.config = {
-            "chunk_size": 300,      # Reduced from 400 for better precision
-            "chunk_overlap": 50,    # Added overlap for context preservation
-            "max_results": 10,
-            "score_threshold": 0.3, # Increased from 0.0 to filter low-quality results
-            "enable_hybrid": True,   # Enable hybrid search (vector + BM25)
-            "hybrid_weights": {"vector": 0.7, "bm25": 0.3}  # Weight for hybrid scoring
-        }
-        # Update with any provided config
-        if config:
-            self.config.update(config)
-
-        # Ensure embedding model configured (defaults to local BGE-M3)
-        default_embedding_model = getattr(settings, 'RAG_EMBEDDING_MODEL', 'bge-m3')
-        self.config.setdefault("embedding_model", default_embedding_model)
-        self.default_embedding_model = default_embedding_model
-        
-        # Content processing components
-        self.nlp_model = None
-        self.lemmatizer = None
-        self.stop_words = set()
-        self.markitdown = None
-        self.supported_types = {
-            'text/plain': self._process_text,
-            'application/pdf': self._process_with_markitdown,
-            'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._process_docx,
-            'application/msword': self._process_docx,
-            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': self._process_with_markitdown,
-            'application/vnd.ms-excel': self._process_with_markitdown,
-            'text/html': self._process_html,
-            'application/json': self._process_json,
-            'application/x-ndjson': self._process_jsonl,  # JSONL support
-            'text/markdown': self._process_markdown,
-            'text/csv': self._process_csv
-        }
-        
-        self.stats = {
-            "documents_indexed": 0,
-            "documents_processed": 0,
-            "total_processing_time": 0,
-            "average_processing_time": 0,
-            "searches_performed": 0,
-            "average_search_time": 0.0,
-            "cache_hits": 0,
-            "errors": 0,
-            "supported_types": len(self.supported_types)
-        }
-        self.search_cache = {}
-        self.collection_vector_sizes: Dict[str, int] = {}
-    
-    def get_required_permissions(self) -> List[Permission]:
-        """Return list of permissions this module requires"""
-        return [
-            Permission("documents", "index", "Index new documents"),
-            Permission("documents", "search", "Search documents"),
-            Permission("documents", "delete", "Delete documents"),
-            Permission("collections", "manage", "Manage collections"),
-            Permission("settings", "configure", "Configure RAG settings")
-        ]
-    
-    async def initialize(self):
-        """Initialize the RAG module with content processing capabilities"""
-        
-        try:
-            # Initialize Qdrant client
-            qdrant_host = getattr(settings, 'QDRANT_HOST', 'localhost')
-            qdrant_port = getattr(settings, 'QDRANT_PORT', 6333)
-            qdrant_url = f"http://{qdrant_host}:{qdrant_port}"
-            self.qdrant_client = QdrantClient(url=qdrant_url)
-            
-            # Initialize tokenizer
-            self.tokenizer = tiktoken.get_encoding("cl100k_base")
-            
-            # Initialize embedding model
-            self.embedding_model = await self._initialize_embedding_model()
-            
-            # Initialize content processing components
-            await self._initialize_content_processing()
-            
-            # Create default collection if it doesn't exist
-            await self._ensure_collection_exists(self.default_collection_name)
-            
-            self.enabled = True
-            self.initialized = True
-            log_module_event("rag", "initialized", {
-                "vector_db": self.config.get("vector_db", "qdrant"),
-                "embedding_model": self.embedding_model.get("model_name", self.default_embedding_model),
-                "chunk_size": self.config.get("chunk_size", 400),
-                "max_results": self.config.get("max_results", 10),
-                "supported_file_types": list(self.supported_types.keys()),
-                "nltk_ready": True,
-                "spacy_ready": self.nlp_model is not None,
-                "markitdown_ready": self.markitdown is not None
-            })
-            
-        except Exception as e:
-            logger.error(f"Failed to initialize RAG module: {e}")
-            log_module_event("rag", "initialization_failed", {"error": str(e)})
-            self.enabled = False
-            raise
-    
-    def _generate_file_hash(self, content: bytes) -> str:
-        """Generate SHA-256 hash of file content"""
-        return hashlib.sha256(content).hexdigest()
-    
-    def _detect_mime_type(self, filename: str, content: bytes) -> str:
-        """Detect MIME type of file"""
-        # Try to detect from filename
-        mime_type, _ = mimetypes.guess_type(filename)
-        if mime_type:
-            return mime_type
-        
-        # Check for JSONL file extension
-        if filename.lower().endswith('.jsonl'):
-            return 'application/x-ndjson'
-        
-        # Try to detect from content
-        if content.startswith(b'%PDF'):
-            return 'application/pdf'
-        elif content.startswith(b'PK'):
-            # This could be DOCX, XLSX, or other Office formats
-            if filename.lower().endswith(('.docx', '.docm')):
-                return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
-            elif filename.lower().endswith(('.xlsx', '.xlsm')):
-                return 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
-            else:
-                return 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
-        elif content.startswith(b'\xd0\xcf\x11\xe0'):
-            # Old Office format (DOC, XLS)
-            if filename.lower().endswith('.xls'):
-                return 'application/vnd.ms-excel'
-            else:
-                return 'application/msword'
-        elif content.startswith(b'<html') or content.startswith(b'<!DOCTYPE'):
-            return 'text/html'
-        elif content.startswith(b'{') or content.startswith(b'['):
-            # Check if it's JSONL by looking for newline-delimited JSON
-            try:
-                lines = content.decode('utf-8', errors='ignore').split('\n')
-                if len(lines) > 1 and all(line.strip().startswith('{') for line in lines[:3] if line.strip()):
-                    return 'application/x-ndjson'
-            except:
-                pass
-            return 'application/json'
-        else:
-            return 'text/plain'
-    
-    def _detect_language(self, text: str) -> Tuple[str, float]:
-        """Detect language of text (simplified implementation)"""
-        if len(text) < 50:
-            return 'unknown', 0.0
-        
-        # Simple heuristic based on common English words
-        english_words = {'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'shall'}
-        
-        if NLTK_AVAILABLE:
-            words = word_tokenize(text.lower())
-        else:
-            # Fallback to simple whitespace tokenization
-            words = text.lower().split()
-            
-        english_count = sum(1 for word in words if word in english_words)
-        confidence = min(english_count / len(words), 1.0) if words else 0.0
-        
-        return 'en' if confidence > 0.1 else 'unknown', confidence
-    
-    def _extract_entities(self, text: str) -> List[Dict[str, Any]]:
-        """Extract named entities from text"""
-        if not self.nlp_model:
-            return []
-        
-        try:
-            doc = self.nlp_model(text[:10000])  # Limit text length for performance
-            entities = []
-            
-            for ent in doc.ents:
-                entities.append({
-                    "text": ent.text,
-                    "label": ent.label_,
-                    "start": ent.start_char,
-                    "end": ent.end_char,
-                    "confidence": float(ent._.get("score", 0.0)) if hasattr(ent._, "score") else 0.0
-                })
-            
-            return entities
-            
-        except Exception as e:
-            logger.error(f"Error extracting entities: {e}")
-            return []
-    
-    def _extract_keywords(self, text: str, max_keywords: int = 20) -> List[str]:
-        """Extract keywords from text"""
-        try:
-            if NLTK_AVAILABLE:
-                words = word_tokenize(text.lower())
-            else:
-                # Fallback to simple whitespace tokenization
-                words = text.lower().split()
-                
-            words = [word for word in words if word.isalpha() and word not in self.stop_words]
-            
-            if self.lemmatizer and NLTK_AVAILABLE:
-                words = [self.lemmatizer.lemmatize(word) for word in words]
-            
-            # Simple frequency-based keyword extraction
-            word_freq = {}
-            for word in words:
-                word_freq[word] = word_freq.get(word, 0) + 1
-            
-            # Sort by frequency and return top keywords
-            keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
-            return [word for word, freq in keywords[:max_keywords] if freq > 1]
-            
-        except Exception as e:
-            logger.error(f"Error extracting keywords: {e}")
-            return []
-    
-    def _clean_text(self, text: str) -> str:
-        """Clean and normalize text"""
-        if not text:
-            return ""
-        
-        # Remove excessive whitespace
-        text = re.sub(r'\s+', ' ', text)
-        
-        # Remove control characters except newlines and tabs
-        text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text)
-        
-        # Normalize quotes
-        text = re.sub(r'[""''`]', '"', text)
-        
-        # Remove excessive punctuation
-        text = re.sub(r'[.]{3,}', '...', text)
-        text = re.sub(r'[!]{2,}', '!', text)
-        text = re.sub(r'[?]{2,}', '?', text)
-        
-        return text.strip()
-    
-    def _validate_content(self, content: str, file_type: str) -> ContentValidationResult:
-        """Validate and score content for security and quality"""
-        issues = []
-        security_score = 100.0
-        
-        # Check for potentially malicious content
-        if '<script' in content.lower() or 'javascript:' in content.lower():
-            issues.append("Potentially malicious JavaScript content detected")
-            security_score -= 30
-        
-        if re.search(r'<iframe|<object|<embed', content, re.IGNORECASE):
-            issues.append("Embedded content detected")
-            security_score -= 20
-        
-        # Check for suspicious URLs
-        if re.search(r'https?://[^\s]+\.(exe|bat|cmd|scr|vbs|js)', content, re.IGNORECASE):
-            issues.append("Suspicious executable URLs detected")
-            security_score -= 40
-        
-        # Check content length
-        if len(content) > 1000000:  # 1MB limit
-            issues.append("Content exceeds maximum size limit")
-            security_score -= 10
-        
-        # Detect language
-        language, lang_confidence = self._detect_language(content)
-        
-        return ContentValidationResult(
-            is_valid=len(issues) == 0,
-            issues=issues,
-            security_score=max(0, security_score),
-            content_type=file_type,
-            language_confidence=lang_confidence
-        )
-    
-    async def cleanup(self):
-        """Cleanup RAG resources"""
-        if self.qdrant_client:
-            self.qdrant_client.close()
-            self.qdrant_client = None
-        
-        if self.embedding_service:
-            await self.embedding_service.cleanup()
-            self.embedding_service = None
-        
-        # Cleanup content processing resources
-        self.nlp_model = None
-        self.lemmatizer = None
-        self.markitdown = None
-        self.stop_words.clear()
-        
-        self.enabled = False
-        self.search_cache.clear()
-        log_module_event("rag", "cleanup", {"success": True})
-    
-    async def _initialize_embedding_model(self):
-        """Initialize embedding model"""
-        from app.services.ollama_embedding_service import ollama_embedding_service
-
-        model_name = self.config.get("embedding_model", self.default_embedding_model)
-        ollama_embedding_service.model_name = model_name
-
-        # Initialize the embedding service
-        success = await ollama_embedding_service.initialize()
-
-        if success:
-            self.embedding_service = ollama_embedding_service
-            logger.info(f"Successfully initialized embedding service with {model_name}")
-            return {
-                "model_name": model_name,
-                "dimension": ollama_embedding_service.dimension or 1024
-            }
-        else:
-            # Fallback to mock implementation
-            logger.warning("Failed to initialize embedding model, using fallback")
-            self.embedding_service = None
-            return {
-                "model_name": model_name,
-                "dimension": 1024  # Default dimension matching BGE-M3 embeddings
-            }
-    
-    async def _initialize_content_processing(self):
-        """Initialize content processing components"""
-        try:
-            # Download required NLTK data
-            await self._download_nltk_data()
-            
-            # Initialize NLP components
-            if NLTK_AVAILABLE:
-                self.lemmatizer = WordNetLemmatizer()
-                self.stop_words = set(stopwords.words('english'))
-            else:
-                self.lemmatizer = None
-                self.stop_words = set()
-            
-            # Initialize spaCy model
-            await self._initialize_spacy_model()
-            
-            # Initialize MarkItDown
-            if MARKITDOWN_AVAILABLE:
-                self.markitdown = MarkItDown()
-            else:
-                self.markitdown = None
-            
-        except Exception as e:
-            logger.warning(f"Failed to initialize some content processing components: {e}")
-    
-    async def _download_nltk_data(self):
-        """Download required NLTK data"""
-        if not NLTK_AVAILABLE:
-            return
-            
-        try:
-            nltk.download('punkt', quiet=True)
-            nltk.download('stopwords', quiet=True)
-            nltk.download('wordnet', quiet=True)
-            nltk.download('averaged_perceptron_tagger', quiet=True)
-            nltk.download('omw-1.4', quiet=True)
-        except Exception as e:
-            logger.warning(f"Failed to download NLTK data: {e}")
-    
-    async def _initialize_spacy_model(self):
-        """Initialize spaCy model for NLP tasks"""
-        if not SPACY_AVAILABLE:
-            self.nlp_model = None
-            return
-            
-        try:
-            self.nlp_model = spacy.load("en_core_web_sm")
-        except OSError:
-            logger.warning("spaCy model 'en_core_web_sm' not found. NLP features will be limited.")
-            self.nlp_model = None
-    
-    async def _get_collections_safely(self) -> List[str]:
-        """Get list of collections using raw HTTP to avoid Pydantic validation issues"""
-        try:
-            import httpx
-            qdrant_host = getattr(settings, 'QDRANT_HOST', 'localhost')
-            qdrant_port = getattr(settings, 'QDRANT_PORT', 6333)
-            qdrant_url = f"http://{qdrant_host}:{qdrant_port}"
-            
-            async with httpx.AsyncClient() as client:
-                response = await client.get(f"{qdrant_url}/collections")
-                if response.status_code == 200:
-                    data = response.json()
-                    result = data.get("result", {})
-                    collections = result.get("collections", [])
-                    return [col.get("name", "") for col in collections if col.get("name")]
-                else:
-                    logger.warning(f"Failed to get collections via HTTP: {response.status_code}")
-                    return []
-        except Exception as e:
-            logger.error(f"Error getting collections safely: {e}")
-            # Fallback to direct client call with error handling
-            try:
-                collections = self.qdrant_client.get_collections()
-                return [col.name for col in collections.collections]
-            except Exception as fallback_error:
-                logger.error(f"Fallback collection fetch also failed: {fallback_error}")
-                return []
-
-    async def _get_collection_info_safely(self, collection_name: str) -> Dict[str, Any]:
-        """Get collection information using raw HTTP to avoid Pydantic validation issues"""
-        try:
-            import httpx
-            qdrant_host = getattr(settings, 'QDRANT_HOST', 'localhost')
-            qdrant_port = getattr(settings, 'QDRANT_PORT', 6333)
-            qdrant_url = f"http://{qdrant_host}:{qdrant_port}"
-            
-            async with httpx.AsyncClient() as client:
-                response = await client.get(f"{qdrant_url}/collections/{collection_name}")
-                if response.status_code == 200:
-                    data = response.json()
-                    result = data.get("result", {})
-                    
-                    # Extract relevant information safely
-                    collection_info = {
-                        "points_count": result.get("points_count", 0),
-                        "status": result.get("status", "unknown"),
-                        "vector_size": 384  # Default fallback
-                    }
-                    
-                    # Try to get vector dimension from config
-                    try:
-                        config = result.get("config", {})
-                        params = config.get("params", {})
-                        vectors = params.get("vectors", {})
-                        
-                        if isinstance(vectors, dict) and "size" in vectors:
-                            collection_info["vector_size"] = vectors["size"]
-                        elif isinstance(vectors, dict):
-                            # Handle named vectors or default vector
-                            if 'default' in vectors:
-                                collection_info["vector_size"] = vectors['default'].get('size', 384)
-                            else:
-                                # Take first vector config if no default
-                                first_vector = next(iter(vectors.values()), {})
-                                collection_info["vector_size"] = first_vector.get('size', 384)
-                    except Exception:
-                        # Keep default fallback
-                        pass
-                    
-                    return collection_info
-                else:
-                    logger.warning(f"Failed to get collection info via HTTP: {response.status_code}")
-                    return {"points_count": 0, "status": "error", "vector_size": 384}
-        except Exception as e:
-            logger.error(f"Error getting collection info safely: {e}")
-            return {"points_count": 0, "status": "error", "vector_size": 384}
-
-    async def _ensure_collection_exists(self, collection_name: str = None):
-        """Ensure the specified collection exists"""
-        collection_name = collection_name or self.default_collection_name
-        
-        try:
-            # Use safe collection fetching to avoid Pydantic validation errors
-            collection_names = await self._get_collections_safely()
-
-            if collection_name not in collection_names:
-                # Create collection with the current embedding dimension
-                vector_dimension = self.embedding_model.get(
-                    "dimension",
-                    getattr(self.embedding_service, "dimension", 1024) or 1024
-                )
-
-                self.qdrant_client.create_collection(
-                    collection_name=collection_name,
-                    vectors_config=VectorParams(
-                        size=vector_dimension,
-                        distance=Distance.COSINE
-                    )
-                )
-                self.collection_vector_sizes[collection_name] = vector_dimension
-                log_module_event("rag", "collection_created", {"collection": collection_name})
-            else:
-                # Cache existing collection vector size for later alignment
-                try:
-                    info = self.qdrant_client.get_collection(collection_name)
-                    vectors_param = getattr(info.config.params, "vectors", None) if hasattr(info, "config") else None
-                    existing_size = None
-                    if vectors_param is not None and hasattr(vectors_param, "size"):
-                        existing_size = vectors_param.size
-                    elif isinstance(vectors_param, dict):
-                        existing_size = vectors_param.get("size")
-
-                    if existing_size:
-                        self.collection_vector_sizes[collection_name] = existing_size
-                except Exception as inner_error:
-                    logger.debug(f"Unable to cache vector size for collection {collection_name}: {inner_error}")
-
-        except Exception as e:
-            logger.error(f"Error ensuring collection exists: {e}")
-            raise
-    
-    async def create_collection(self, collection_name: str) -> bool:
-        """Create a new Qdrant collection"""
-        try:
-            await self._ensure_collection_exists(collection_name)
-            return True
-        except Exception as e:
-            logger.error(f"Error creating collection {collection_name}: {e}")
-            return False
-    
-    async def delete_collection(self, collection_name: str) -> bool:
-        """Delete a Qdrant collection"""
-        try:
-            # Use safe collection fetching to avoid Pydantic validation errors
-            collection_names = await self._get_collections_safely()
-            
-            if collection_name in collection_names:
-                self.qdrant_client.delete_collection(collection_name)
-                log_module_event("rag", "collection_deleted", {"collection": collection_name})
-                return True
-            else:
-                logger.warning(f"Collection {collection_name} does not exist")
-                return False
-                
-        except Exception as e:
-            logger.error(f"Error deleting collection {collection_name}: {e}")
-            return False
-    
-    async def _generate_embedding(self, text: str) -> List[float]:
-        """Generate embedding for text"""
-        if self.embedding_service:
-            # Use real embedding service
-            vector = await self.embedding_service.get_embedding(text)
-            return vector
-        else:
-            # Fallback to deterministic random embedding for consistency
-            np.random.seed(hash(text) % 2**32)
-            fallback_dim = self.embedding_model.get("dimension", getattr(self.embedding_service, "dimension", 1024) or 1024)
-            return np.random.random(fallback_dim).tolist()
-    
-    async def _generate_embeddings(self, texts: List[str], is_document: bool = True) -> List[List[float]]:
-        """Generate embeddings for multiple texts (batch processing)"""
-        if self.embedding_service:
-            # Add task-specific prefixes for better E5 model performance
-            if is_document:
-                # For document passages, use "passage:" prefix
-                prefixed_texts = [f"passage: {text}" for text in texts]
-            else:
-                # For queries, use "query:" prefix (handled in search method)
-                prefixed_texts = texts
-
-            # Use real embedding service for batch processing
-            backend = getattr(self.embedding_service, "backend", "unknown")
-            start_time = time.time()
-            logger.info(
-                "Embedding batch requested",
-                extra={
-                    "backend": backend,
-                    "model": getattr(self.embedding_service, "model_name", "unknown"),
-                    "count": len(prefixed_texts),
-                    "scope": "documents" if is_document else "queries"
-                },
-            )
-            embeddings = await self.embedding_service.get_embeddings(prefixed_texts)
-            duration = time.time() - start_time
-            logger.info(
-                "Embedding batch finished",
-                extra={
-                    "backend": backend,
-                    "model": getattr(self.embedding_service, "model_name", "unknown"),
-                    "count": len(embeddings),
-                    "scope": "documents" if is_document else "queries",
-                    "duration_sec": round(duration, 4)
-                },
-            )
-            return embeddings
-        else:
-            # Fallback to individual processing
-            logger.warning(
-                "Embedding service unavailable, falling back to per-item generation",
-                extra={
-                    "count": len(texts),
-                    "scope": "documents" if is_document else "queries"
-                },
-            )
-            embeddings = []
-            for text in texts:
-                embedding = await self._generate_embedding(text)
-                embeddings.append(embedding)
-            return embeddings
-
-    def _get_collection_vector_size(self, collection_name: Optional[str]) -> int:
-        """Return the expected vector size for a collection, caching results."""
-        default_dim = self.embedding_model.get(
-            "dimension",
-            getattr(self.embedding_service, "dimension", 384) or 384
-        )
-
-        if not collection_name:
-            return default_dim
-
-        if collection_name in self.collection_vector_sizes:
-            return self.collection_vector_sizes[collection_name]
-
-        try:
-            info = self.qdrant_client.get_collection(collection_name)
-            vectors_param = getattr(info.config.params, "vectors", None) if hasattr(info, "config") else None
-            existing_size = None
-            if vectors_param is not None and hasattr(vectors_param, "size"):
-                existing_size = vectors_param.size
-            elif isinstance(vectors_param, dict):
-                existing_size = vectors_param.get("size")
-
-            if existing_size:
-                self.collection_vector_sizes[collection_name] = existing_size
-                return existing_size
-        except Exception as e:
-            logger.debug(f"Unable to determine vector size for {collection_name}: {e}")
-
-        self.collection_vector_sizes[collection_name] = default_dim
-        return default_dim
-
-    def _align_embedding_dimension(self, vector: List[float], collection_name: Optional[str]) -> List[float]:
-        """Pad or truncate embeddings to match the target collection dimension."""
-        if vector is None:
-            return vector
-
-        target_dim = self._get_collection_vector_size(collection_name)
-        current_dim = len(vector)
-
-        if current_dim == target_dim:
-            return vector
-        if current_dim > target_dim:
-            return vector[:target_dim]
-        # Pad with zeros to reach the target dimension
-        padding = [0.0] * (target_dim - current_dim)
-        return vector + padding
-    
-    def _chunk_text(self, text: str, chunk_size: int = None) -> List[str]:
-        """Split text into overlapping chunks for better context preservation"""
-        chunk_size = chunk_size or self.config.get("chunk_size", 300)
-        chunk_overlap = self.config.get("chunk_overlap", 50)
-
-        # Ensure sane values to avoid infinite loops on very short docs
-        chunk_size = max(1, chunk_size)
-        if chunk_overlap >= chunk_size:
-            chunk_overlap = max(0, chunk_size - 1)
-
-        tokens = self.tokenizer.encode(text)
-        if not tokens:
-            return []
-
-        chunks: List[str] = []
-        len_tokens = len(tokens)
-        start_idx = 0
-        step = max(1, chunk_size - chunk_overlap)
-
-        while start_idx < len_tokens:
-            end_idx = min(start_idx + chunk_size, len_tokens)
-            chunk_tokens = tokens[start_idx:end_idx]
-
-            if not chunk_tokens:
-                break
-
-            chunk_text = self.tokenizer.decode(chunk_tokens)
-
-            if chunk_text.strip():
-                chunks.append(chunk_text)
-
-            if end_idx >= len_tokens:
-                break
-
-            start_idx += step
-
-        return chunks
-    
-    async def _process_text(self, content: bytes, filename: str) -> str:
-        """Process plain text files"""
-        try:
-            # Try different encodings
-            for encoding in ['utf-8', 'latin-1', 'cp1252']:
-                try:
-                    return content.decode(encoding)
-                except UnicodeDecodeError:
-                    continue
-            
-            # Fallback to utf-8 with error handling
-            return content.decode('utf-8', errors='replace')
-            
-        except Exception as e:
-            logger.error(f"Error processing text file: {e}")
-            return ""
-    
-    async def _process_with_markitdown(self, content: bytes, filename: str) -> str:
-        """Process documents using MarkItDown (PDF, DOCX, DOC, XLSX, XLS)"""
-        try:
-            if not self.markitdown:
-                raise RuntimeError("MarkItDown not initialized")
-            
-            # Create a temporary file path for the content
-            import tempfile
-            import os
-            
-            # Get file extension from filename
-            file_ext = Path(filename).suffix.lower()
-            if not file_ext:
-                # Try to determine extension from mime type
-                mime_type = self._detect_mime_type(filename, content)
-                if mime_type == 'application/pdf':
-                    file_ext = '.pdf'
-                elif mime_type in ['application/vnd.openxmlformats-officedocument.wordprocessingml.document']:
-                    file_ext = '.docx'
-                elif mime_type == 'application/msword':
-                    file_ext = '.doc'
-                elif mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
-                    file_ext = '.xlsx'
-                elif mime_type == 'application/vnd.ms-excel':
-                    file_ext = '.xls'
-                else:
-                    file_ext = '.bin'
-            
-            # Write content to temporary file
-            with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
-                temp_file.write(content)
-                temp_path = temp_file.name
-            
-            try:
-                # Convert document to markdown using MarkItDown in a thread pool to avoid blocking
-                import concurrent.futures
-                import asyncio
-                
-                logger.info(f"Starting MarkItDown conversion for {filename}")
-                
-                def convert_sync():
-                    """Synchronous conversion function to run in thread pool"""
-                    return self.markitdown.convert(temp_path)
-                
-                # Run the synchronous conversion in a thread pool with timeout
-                loop = asyncio.get_event_loop()
-                with concurrent.futures.ThreadPoolExecutor() as executor:
-                    try:
-                        result = await asyncio.wait_for(
-                            loop.run_in_executor(executor, convert_sync),
-                            timeout=120.0  # 2 minute timeout for MarkItDown conversion
-                        )
-                    except asyncio.TimeoutError:
-                        logger.error(f"MarkItDown conversion timed out for {filename}")
-                        raise RuntimeError(f"Document conversion timed out after 2 minutes for {filename}")
-                
-                if result and hasattr(result, 'text_content'):
-                    converted_text = result.text_content
-                elif result and isinstance(result, str):
-                    converted_text = result
-                else:
-                    # Fallback if result format is unexpected
-                    converted_text = str(result) if result else ""
-                
-                logger.info(f"Successfully converted {filename} using MarkItDown ({len(converted_text)} characters)")
-                return converted_text
-                
-            finally:
-                # Clean up temporary file
-                try:
-                    os.unlink(temp_path)
-                except OSError:
-                    pass
-                    
-        except Exception as e:
-            logger.error(f"Error processing {filename} with MarkItDown: {e}")
-            # Fallback to basic text extraction attempt
-            try:
-                return content.decode('utf-8', errors='replace')
-            except:
-                return f"Error processing {filename}: {str(e)}"
-    
-    async def _process_docx(self, content: bytes, filename: str) -> str:
-        """Process DOCX files using python-docx (more reliable than MarkItDown)"""
-        try:
-            if not PYTHON_DOCX_AVAILABLE:
-                logger.warning(f"python-docx not available, falling back to MarkItDown for {filename}")
-                return await self._process_with_markitdown(content, filename)
-            
-            # Create a temporary file for python-docx processing
-            import tempfile
-            import os
-            
-            logger.info(f"Starting DOCX processing for {filename} using python-docx")
-            
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as temp_file:
-                temp_file.write(content)
-                temp_path = temp_file.name
-            
-            try:
-                # Process in a thread pool to avoid blocking
-                import concurrent.futures
-                import asyncio
-                
-                def extract_docx_text():
-                    """Extract text from DOCX file synchronously"""
-                    doc = DocxDocument(temp_path)
-                    text_parts = []
-                    
-                    # Extract paragraphs
-                    for paragraph in doc.paragraphs:
-                        if paragraph.text.strip():
-                            text_parts.append(paragraph.text.strip())
-                    
-                    # Extract text from tables
-                    for table in doc.tables:
-                        for row in table.rows:
-                            row_text = []
-                            for cell in row.cells:
-                                if cell.text.strip():
-                                    row_text.append(cell.text.strip())
-                            if row_text:
-                                text_parts.append(" | ".join(row_text))
-                    
-                    return "\n\n".join(text_parts)
-                
-                # Run extraction in thread pool with timeout
-                loop = asyncio.get_event_loop()
-                with concurrent.futures.ThreadPoolExecutor() as executor:
-                    try:
-                        extracted_text = await asyncio.wait_for(
-                            loop.run_in_executor(executor, extract_docx_text),
-                            timeout=30.0  # 30 second timeout for DOCX processing
-                        )
-                    except asyncio.TimeoutError:
-                        logger.error(f"DOCX processing timed out for {filename}")
-                        raise RuntimeError(f"DOCX processing timed out after 30 seconds for {filename}")
-                
-                logger.info(f"Successfully processed {filename} using python-docx ({len(extracted_text)} characters)")
-                return extracted_text
-                
-            finally:
-                # Clean up temporary file
-                try:
-                    os.unlink(temp_path)
-                except OSError:
-                    pass
-                    
-        except Exception as e:
-            logger.error(f"Error processing DOCX file {filename}: {e}")
-            # Fallback to MarkItDown if python-docx fails
-            try:
-                logger.info(f"Falling back to MarkItDown for {filename}")
-                return await self._process_with_markitdown(content, filename)
-            except Exception as fallback_error:
-                logger.error(f"Both python-docx and MarkItDown failed for {filename}: {fallback_error}")
-                return f"Error processing DOCX {filename}: {str(e)}"
-    
-    async def _process_html(self, content: bytes, filename: str) -> str:
-        """Process HTML files"""
-        try:
-            html_content = content.decode('utf-8', errors='replace')
-            # Simple HTML tag removal
-            text = re.sub(r'<[^>]+>', '', html_content)
-            # Decode HTML entities
-            text = text.replace('&amp;', '&').replace('&lt;', '<').replace('&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")
-            return text
-            
-        except Exception as e:
-            logger.error(f"Error processing HTML file: {e}")
-            return ""
-    
-    async def _process_json(self, content: bytes, filename: str) -> str:
-        """Process JSON files"""
-        try:
-            json_data = json.loads(content.decode('utf-8'))
-            # Convert JSON to readable text
-            return json.dumps(json_data, indent=2)
-            
-        except Exception as e:
-            logger.error(f"Error processing JSON file: {e}")
-            return ""
-    
-    async def _process_markdown(self, content: bytes, filename: str) -> str:
-        """Process Markdown files"""
-        try:
-            md_content = content.decode('utf-8', errors='replace')
-            # Simple markdown processing - remove formatting
-            text = re.sub(r'#+\s*', '', md_content)  # Remove headers
-            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)  # Bold
-            text = re.sub(r'\*(.+?)\*', r'\1', text)  # Italic
-            text = re.sub(r'`(.+?)`', r'\1', text)  # Code
-            text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)  # Links
-            return text
-            
-        except Exception as e:
-            logger.error(f"Error processing Markdown file: {e}")
-            return ""
-    
-    async def _process_csv(self, content: bytes, filename: str) -> str:
-        """Process CSV files"""
-        try:
-            csv_content = content.decode('utf-8', errors='replace')
-            # Convert CSV to readable text
-            lines = csv_content.split('\n')
-            processed_lines = []
-            
-            for line in lines[:100]:  # Limit to first 100 lines
-                if line.strip():
-                    processed_lines.append(line.replace(',', ' | '))
-            
-            return '\n'.join(processed_lines)
-            
-        except Exception as e:
-            logger.error(f"Error processing CSV file: {e}")
-            return ""
-    
-    async def _process_jsonl(self, content: bytes, filename: str) -> str:
-        """Process JSONL files (newline-delimited JSON)
-
-        Specifically optimized for helpjuice-export.jsonl format:
-        - Each line contains a JSON object with 'id' and 'payload'
-        - Payload contains 'question', 'language', and 'answer' fields
-        - Combines question and answer into searchable content
-
-        Performance optimizations:
-        - Processes articles in smaller batches to reduce memory usage
-        - Uses streaming approach for large files
-        """
-        try:
-            # Use streaming approach for large files
-            jsonl_content = content.decode('utf-8', errors='replace')
-            lines = jsonl_content.strip().split('\n')
-
-            processed_articles = []
-            batch_size = 50  # Process in batches of 50 articles
-
-            for line_num, line in enumerate(lines, 1):
-                if not line.strip():
-                    continue
-
-                try:
-                    # Parse each JSON line
-                    data = json.loads(line)
-
-                    # Handle helpjuice export format
-                    if 'payload' in data:
-                        payload = data['payload']
-                        article_id = data.get('id', f'article_{line_num}')
-
-                        # Extract fields
-                        question = payload.get('question', '')
-                        answer = payload.get('answer', '')
-                        language = payload.get('language', 'EN')
-
-                        # Combine question and answer for better search
-                        if question or answer:
-                            # Format as Q&A for better context
-                            article_text = f"## {question}\n\n{answer}\n\n"
-
-                            # Add language tag if not English
-                            if language != 'EN':
-                                article_text = f"[{language}] {article_text}"
-
-                            # Add metadata separator
-                            article_text += f"---\nArticle ID: {article_id}\nLanguage: {language}\n\n"
-
-                            processed_articles.append(article_text)
-
-                    # Handle generic JSONL format
-                    else:
-                        # Convert the entire JSON object to readable text
-                        json_text = json.dumps(data, indent=2, ensure_ascii=False)
-                        processed_articles.append(json_text + "\n\n")
-
-                except json.JSONDecodeError as e:
-                    logger.warning(f"Error parsing JSONL line {line_num}: {e}")
-                    continue
-                except Exception as e:
-                    logger.warning(f"Error processing JSONL line {line_num}: {e}")
-                    continue
-
-            # Combine all articles
-            combined_text = '\n'.join(processed_articles)
-
-            logger.info(f"Successfully processed {len(processed_articles)} articles from JSONL file {filename}")
-            return combined_text
-
-        except Exception as e:
-            logger.error(f"Error processing JSONL file {filename}: {e}")
-            return ""
-    
-    def _generate_document_id(self, content: str, metadata: Dict[str, Any]) -> str:
-        """Generate unique document ID"""
-        content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
-        metadata_hash = hashlib.sha256(json.dumps(metadata, sort_keys=True).encode()).hexdigest()[:8]
-        return f"{content_hash}_{metadata_hash}"
-    
-    async def process_document(self, file_data: bytes, filename: str, metadata: Dict[str, Any] = None) -> ProcessedDocument:
-        """Process a document and extract content"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-        
-        import time
-        start_time = time.time()
-        
-        try:
-            logger.info(f"Starting document processing pipeline for {filename}")
-            
-            # Generate file hash and ID
-            file_hash = self._generate_file_hash(file_data)
-            doc_id = f"{file_hash}_{int(time.time())}"
-            logger.info(f"Generated document ID: {doc_id}")
-            
-            # Detect MIME type
-            mime_type = self._detect_mime_type(filename, file_data)
-            file_type = mime_type.split('/')[0]
-            logger.info(f"Detected MIME type: {mime_type}, file type: {file_type}")
-            
-            # Check if file type is supported
-            if mime_type not in self.supported_types:
-                raise ValueError(f"Unsupported file type: {mime_type}")
-            
-            # Extract content using appropriate processor
-            processor = self.supported_types[mime_type]
-            logger.info(f"Using processor: {processor.__name__} for {filename}")
-            extracted_text = await processor(file_data, filename)
-            logger.info(f"Content extraction completed for {filename}, extracted {len(extracted_text)} characters")
-            
-            # Clean the extracted text
-            logger.info(f"Starting text cleaning for {filename}")
-            cleaned_text = self._clean_text(extracted_text)
-            logger.info(f"Text cleaning completed for {filename}, final text length: {len(cleaned_text)}")
-            
-            # Validate content
-            logger.info(f"Starting content validation for {filename}")
-            validation_result = self._validate_content(cleaned_text, file_type)
-            logger.info(f"Content validation completed for {filename}")
-            
-            if not validation_result.is_valid:
-                logger.warning(f"Content validation issues: {validation_result.issues}")
-            
-            # Extract linguistic features
-            logger.info(f"Starting linguistic analysis for {filename}")
-            if NLTK_AVAILABLE and cleaned_text:
-                logger.info(f"Using NLTK for tokenization of {filename}")
-                sentences = sent_tokenize(cleaned_text)
-                words = word_tokenize(cleaned_text)
-            elif cleaned_text:
-                logger.info(f"Using fallback tokenization for {filename}")
-                # Fallback to simple tokenization
-                sentences = cleaned_text.split('.')
-                words = cleaned_text.split()
-            else:
-                logger.warning(f"No text content for linguistic analysis in {filename}")
-                sentences = []
-                words = []
-            
-            logger.info(f"Tokenization completed for {filename}: {len(sentences)} sentences, {len(words)} words")
-            
-            # Detect language
-            logger.info(f"Starting language detection for {filename}")
-            language, lang_confidence = self._detect_language(cleaned_text)
-            logger.info(f"Language detection completed for {filename}: {language} (confidence: {lang_confidence:.2f})")
-            
-            # Extract entities and keywords
-            logger.info(f"Starting entity extraction for {filename}")
-            entities = self._extract_entities(cleaned_text)
-            logger.info(f"Entity extraction completed for {filename}: found {len(entities)} entities")
-            
-            logger.info(f"Starting keyword extraction for {filename}")
-            keywords = self._extract_keywords(cleaned_text)
-            logger.info(f"Keyword extraction completed for {filename}: found {len(keywords)} keywords")
-            
-            # Calculate processing time
-            processing_time = time.time() - start_time
-            
-            # Create processed document
-            logger.info(f"Creating ProcessedDocument object for {filename}")
-            processed_doc = ProcessedDocument(
-                id=doc_id,
-                original_filename=filename,
-                file_type=file_type,
-                mime_type=mime_type,
-                content=cleaned_text,
-                extracted_text=extracted_text,
-                metadata={
-                    **(metadata or {}),
-                    "validation": asdict(validation_result),
-                    "file_size": len(file_data),
-                    "processing_stats": {
-                        "processing_time": processing_time,
-                        "processor_used": processor.__name__
-                    }
-                },
-                word_count=len(words),
-                sentence_count=len(sentences),
-                language=language,
-                entities=entities,
-                keywords=keywords,
-                processing_time=processing_time,
-                processed_at=datetime.utcnow(),
-                file_hash=file_hash,
-                file_size=len(file_data)
-            )
-            logger.info(f"ProcessedDocument created for {filename}")
-            
-            # Update stats
-            self.stats["documents_processed"] += 1
-            self.stats["total_processing_time"] += processing_time
-            self.stats["average_processing_time"] = (
-                self.stats["total_processing_time"] / self.stats["documents_processed"]
-            )
-            
-            log_module_event("rag", "document_processed", {
-                "document_id": doc_id,
-                "filename": filename,
-                "file_type": file_type,
-                "word_count": len(words),
-                "processing_time": processing_time,
-                "language": language,
-                "entities_count": len(entities),
-                "keywords_count": len(keywords)
-            })
-            
-            logger.info(f"Document processing completed successfully for {filename} in {processing_time:.2f} seconds")
-            return processed_doc
-            
-        except Exception as e:
-            self.stats["errors"] += 1
-            logger.error(f"Error processing document {filename}: {e}")
-            log_module_event("rag", "processing_failed", {
-                "filename": filename,
-                "error": str(e)
-            })
-            raise
-    
-    async def index_document(self, content: str, metadata: Dict[str, Any] = None, collection_name: str = None) -> str:
-        """Index a document in the vector database (backward compatibility method)"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-        
-        collection_name = collection_name or self.default_collection_name
-        metadata = metadata or {}
-        
-        try:
-            # Ensure collection exists
-            await self._ensure_collection_exists(collection_name)
-            
-            # Generate document ID
-            doc_id = self._generate_document_id(content, metadata)
-            
-            # Check if document already exists
-            if await self._document_exists(doc_id, collection_name):
-                log_module_event("rag", "document_exists", {"document_id": doc_id, "collection": collection_name})
-                return doc_id
-            
-            # Chunk the document
-            chunks = self._chunk_text(content)
-            
-            # Generate embeddings for all chunks in batch (more efficient)
-            embeddings = await self._generate_embeddings(chunks, is_document=True)
-            
-            # Create document points
-            points = []
-            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
-                aligned_embedding = self._align_embedding_dimension(embedding, collection_name)
-                chunk_id = str(uuid.uuid4())
-
-                chunk_metadata = {
-                    **metadata,
-                    "document_id": doc_id,
-                    "chunk_index": i,
-                    "chunk_count": len(chunks),
-                    "content": chunk,
-                    "indexed_at": datetime.utcnow().isoformat()
-                }
-                
-                points.append(PointStruct(
-                    id=chunk_id,
-                    vector=aligned_embedding,
-                    payload=chunk_metadata
-                ))
-            
-            # Insert points into Qdrant
-            self.qdrant_client.upsert(
-                collection_name=collection_name,
-                points=points
-            )
-            
-            self.stats["documents_indexed"] += 1
-            log_module_event("rag", "document_indexed", {
-                "document_id": doc_id,
-                "collection": collection_name,
-                "chunks": len(chunks),
-                "metadata": metadata
-            })
-            
-            return doc_id
-            
-        except Exception as e:
-            logger.error(f"Error indexing document: {e}")
-            log_module_event("rag", "indexing_failed", {"error": str(e)})
-            raise
-    
-    async def index_processed_document(self, processed_doc: ProcessedDocument, collection_name: str = None) -> str:
-        """Index a processed document in the vector database"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-
-        collection_name = collection_name or self.default_collection_name
-
-        try:
-            # Special handling for JSONL files
-            if processed_doc.file_type == 'jsonl':
-                # Import the optimized JSONL processor
-                from app.services.jsonl_processor import JSONLProcessor
-                jsonl_processor = JSONLProcessor(self)
-
-                # Read the original file content
-                with open(processed_doc.metadata.get('file_path', ''), 'rb') as f:
-                    file_content = f.read()
-
-                # Process using the optimized JSONL processor
-                return await jsonl_processor.process_and_index_jsonl(
-                    collection_name=collection_name,
-                    content=file_content,
-                    filename=processed_doc.original_filename,
-                    metadata=processed_doc.metadata
-                )
-
-            # Ensure collection exists
-            await self._ensure_collection_exists(collection_name)
-            
-            # Check if document already exists
-            if await self._document_exists(processed_doc.id, collection_name):
-                log_module_event("rag", "document_exists", {"document_id": processed_doc.id, "collection": collection_name})
-                return processed_doc.id
-            
-            # Chunk the document
-            chunks = self._chunk_text(processed_doc.content)
-            
-            # Generate embeddings for all chunks in batch (more efficient)
-            embeddings = await self._generate_embeddings(chunks, is_document=True)
-            
-            # Create document points with enhanced metadata
-            points = []
-            for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
-                aligned_embedding = self._align_embedding_dimension(embedding, collection_name)
-                chunk_id = str(uuid.uuid4())
-
-                chunk_metadata = {
-                    **processed_doc.metadata,
-                    "document_id": processed_doc.id,
-                    "original_filename": processed_doc.original_filename,
-                    "file_type": processed_doc.file_type,
-                    "mime_type": processed_doc.mime_type,
-                    "language": processed_doc.language,
-                    "entities": processed_doc.entities,
-                    "keywords": processed_doc.keywords,
-                    "word_count": processed_doc.word_count,
-                    "sentence_count": processed_doc.sentence_count,
-                    "file_hash": processed_doc.file_hash,
-                    "processed_at": processed_doc.processed_at.isoformat(),
-                    "chunk_index": i,
-                    "chunk_count": len(chunks),
-                    "content": chunk,
-                    "indexed_at": datetime.utcnow().isoformat()
-                }
-                
-                points.append(PointStruct(
-                    id=chunk_id,
-                    vector=aligned_embedding,
-                    payload=chunk_metadata
-                ))
-            
-            # Insert points into Qdrant
-            self.qdrant_client.upsert(
-                collection_name=collection_name,
-                points=points
-            )
-            
-            self.stats["documents_indexed"] += 1
-            log_module_event("rag", "processed_document_indexed", {
-                "document_id": processed_doc.id,
-                "filename": processed_doc.original_filename,
-                "collection": collection_name,
-                "chunks": len(chunks),
-                "file_type": processed_doc.file_type,
-                "language": processed_doc.language
-            })
-            
-            return processed_doc.id
-            
-        except Exception as e:
-            logger.error(f"Error indexing processed document: {e}")
-            log_module_event("rag", "indexing_failed", {"error": str(e)})
-            raise
-    
-    async def _document_exists(self, document_id: str, collection_name: str = None) -> bool:
-        """Check if document exists in the collection"""
-        collection_name = collection_name or self.default_collection_name
-        
-        try:
-            result = self.qdrant_client.search(
-                collection_name=collection_name,
-                query_filter=Filter(
-                    must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
-                ),
-                limit=1
-            )
-            return len(result) > 0
-        except Exception:
-            return False
-    
-    async def _hybrid_search(self, collection_name: str, query: str, query_vector: List[float],
-                         query_filter: Optional[Filter], limit: int, score_threshold: float) -> List[Any]:
-        """Perform hybrid search combining vector similarity and BM25 scoring"""
-
-        # Preprocess query for BM25
-        query_terms = self._preprocess_text_for_bm25(query)
-
-        # Get all documents from the collection (for BM25 scoring)
-        # Note: In production, you'd want to optimize this with a proper BM25 index
-        scroll_filter = query_filter or Filter()
-        all_points = []
-
-        # Use scroll to get all points
-        offset = None
-        batch_size = 100
-        while True:
-            search_result = self.qdrant_client.scroll(
-                collection_name=collection_name,
-                scroll_filter=scroll_filter,
-                limit=batch_size,
-                offset=offset,
-                with_payload=True,
-                with_vectors=False
-            )
-
-            points = search_result[0]
-            all_points.extend(points)
-
-            if len(points) < batch_size:
-                break
-
-            offset = points[-1].id
-
-        # Calculate BM25 scores for each document
-        bm25_scores = {}
-        for point in all_points:
-            doc_id = point.payload.get("document_id", "")
-            content = point.payload.get("content", "")
-
-            # Calculate BM25 score
-            bm25_score = self._calculate_bm25_score(query_terms, content)
-            bm25_scores[doc_id] = bm25_score
-
-        # Perform vector search
-        vector_results = self.qdrant_client.search(
-            collection_name=collection_name,
-            query_vector=query_vector,
-            query_filter=query_filter,
-            limit=limit * 2,  # Get more results for re-ranking
-            score_threshold=score_threshold / 2  # Lower threshold for initial search
-        )
-
-        # Combine scores with improved normalization
-        hybrid_weights = self.config.get("hybrid_weights", {"vector": 0.7, "bm25": 0.3})
-        vector_weight = hybrid_weights.get("vector", 0.7)
-        bm25_weight = hybrid_weights.get("bm25", 0.3)
-
-        # Get score distributions for better normalization
-        vector_scores = [r.score for r in vector_results]
-        bm25_scores_list = list(bm25_scores.values())
-
-        # Calculate statistics for normalization
-        if vector_scores:
-            v_max = max(vector_scores)
-            v_min = min(vector_scores)
-            v_range = v_max - v_min if v_max != v_min else 1
-        else:
-            v_max, v_min, v_range = 1, 0, 1
-
-        if bm25_scores_list:
-            bm25_max = max(bm25_scores_list)
-            bm25_min = min(bm25_scores_list)
-            bm25_range = bm25_max - bm25_min if bm25_max != bm25_min else 1
-        else:
-            bm25_max, bm25_min, bm25_range = 1, 0, 1
-
-        # Create hybrid results with improved scoring
-        hybrid_results = []
-        for result in vector_results:
-            doc_id = result.payload.get("document_id", "")
-            vector_score = result.score
-            bm25_score = bm25_scores.get(doc_id, 0.0)
-
-            # Improved normalization using actual score distributions
-            vector_norm = (vector_score - v_min) / v_range if v_range > 0 else 0.5
-            bm25_norm = (bm25_score - bm25_min) / bm25_range if bm25_range > 0 else 0.5
-
-            # Apply reciprocal rank fusion for better combination
-            # This gives more weight to documents that rank highly in both methods
-            rrf_vector = 1.0 / (1.0 + vector_results.index(result) + 1)  # +1 to avoid division by zero
-            rrf_bm25 = 1.0 / (1.0 + sorted(bm25_scores_list, reverse=True).index(bm25_score) + 1) if bm25_score in bm25_scores_list else 0
-
-            # Calculate hybrid score using both normalized scores and RRF
-            hybrid_score = (vector_weight * vector_norm + bm25_weight * bm25_norm) * 0.7 + (rrf_vector + rrf_bm25) * 0.3
-
-            # Create new point with hybrid score
-            hybrid_point = ScoredPoint(
-                id=result.id,
-                payload=result.payload,
-                score=hybrid_score,
-                vector=result.vector,
-                shard_key=None,
-                order_value=None
-            )
-            hybrid_results.append(hybrid_point)
-
-        # Sort by hybrid score and apply final threshold
-        hybrid_results.sort(key=lambda x: x.score, reverse=True)
-        final_results = [r for r in hybrid_results if r.score >= score_threshold][:limit]
-
-        logger.info(f"Hybrid search: {len(vector_results)} vector results, {len(final_results)} final results")
-        return final_results
-
-    def _preprocess_text_for_bm25(self, text: str) -> List[str]:
-        """Preprocess text for BM25 scoring"""
-        if not NLTK_AVAILABLE:
-            return text.lower().split()
-
-        try:
-            # Tokenize
-            tokens = word_tokenize(text.lower())
-
-            # Remove stopwords and non-alphabetic tokens
-            stop_words = set(stopwords.words('english'))
-            filtered_tokens = [
-                token for token in tokens
-                if token.isalpha() and token not in stop_words and len(token) > 2
-            ]
-
-            return filtered_tokens
-        except:
-            # Fallback to simple splitting
-            return text.lower().split()
-
-    def _calculate_bm25_score(self, query_terms: List[str], document: str) -> float:
-        """Calculate BM25 score for a document against query terms"""
-        if not query_terms:
-            return 0.0
-
-        # Preprocess document
-        doc_terms = self._preprocess_text_for_bm25(document)
-        if not doc_terms:
-            return 0.0
-
-        # Calculate term frequencies
-        doc_len = len(doc_terms)
-        avg_doc_len = 300  # Average document length (configurable)
-
-        # BM25 parameters
-        k1 = 1.2  # Controls term frequency saturation
-        b = 0.75  # Controls document length normalization
-
-        score = 0.0
-
-        # Calculate IDF for each query term
-        for term in set(query_terms):
-            # Term frequency in document
-            tf = doc_terms.count(term)
-
-            # Simple IDF (log(N/n) + 1)
-            # In production, you'd use the actual document frequency
-            idf = 2.0  # Simplified IDF
-
-            # BM25 formula
-            numerator = tf * (k1 + 1)
-            denominator = tf + k1 * (1 - b + b * (doc_len / avg_doc_len))
-
-            score += idf * (numerator / denominator)
-
-        # Normalize score to 0-1 range
-        return min(score / 10.0, 1.0)  # Simple normalization
-
-    async def search_documents(self, query: str, max_results: int = None, filters: Dict[str, Any] = None, collection_name: str = None, score_threshold: float = None) -> List[SearchResult]:
-        """Search for relevant documents"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-
-        collection_name = collection_name or self.default_collection_name
-
-        # Special handling for collections with different vector dimensions
-        SPECIAL_COLLECTIONS = {
-            "bitbox02_faq_local": {
-                "dimension": 1024,
-                "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-            },
-            "bitbox_local_rag": {
-                "dimension": 1024,
-                "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-            }
-        }
-        max_results = max_results or self.config.get("max_results", 10)
-        
-        # Check cache (include collection name in cache key)
-        cache_key = f"{collection_name}_{query}_{max_results}_{hash(str(filters))}"
-        if cache_key in self.search_cache:
-            self.stats["cache_hits"] += 1
-            return self.search_cache[cache_key]
-        
-        try:
-            import time
-            start_time = time.time()
-            
-            # Generate query embedding with task-specific prefix for better retrieval
-            try:
-                # Check if this is a special collection
-                if collection_name in SPECIAL_COLLECTIONS:
-                    # Try to import sentence-transformers
-                    import sentence_transformers
-                    from sentence_transformers import SentenceTransformer
-                    model = SentenceTransformer(SPECIAL_COLLECTIONS[collection_name]["model"])
-                    query_embedding = model.encode([query], normalize_embeddings=True)[0].tolist()
-                    logger.info(f"Using {SPECIAL_COLLECTIONS[collection_name]['dimension']}-dim local model for {collection_name}")
-                else:
-                    # The E5 model works better with "query:" prefix for search queries
-                    optimized_query = f"query: {query}"
-                    query_embedding = await self._generate_embedding(optimized_query)
-            except ImportError:
-                # Fallback to default embedding if sentence-transformers is not available
-                logger.warning(f"sentence-transformers not available, falling back to default embedding for {collection_name}")
-                optimized_query = f"query: {query}"
-                query_embedding = await self._generate_embedding(optimized_query)
-
-            query_embedding = self._align_embedding_dimension(query_embedding, collection_name)
-            
-            # Build filter
-            search_filter = None
-            if filters:
-                conditions = []
-                for key, value in filters.items():
-                    conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))
-                search_filter = Filter(must=conditions)
-            
-            # Enhanced debugging for search
-            logger.info("=== ENHANCED RAG SEARCH DEBUGGING ===")
-            logger.info(f"Collection: {collection_name}")
-            logger.info(f"Query: '{query}'")
-            logger.info(f"Max results requested: {max_results}")
-            logger.info(f"Query embedding (first 10 values): {query_embedding[:10] if query_embedding else 'None'}")
-            logger.info(f"Embedding service available: {self.embedding_service is not None}")
-            
-            # Check if hybrid search is enabled
-            enable_hybrid = self.config.get("enable_hybrid", False)
-            # Use provided score_threshold or fall back to config
-            search_score_threshold = score_threshold if score_threshold is not None else self.config.get("score_threshold", 0.3)
-
-            if enable_hybrid and NLTK_AVAILABLE:
-                # Perform hybrid search (vector + BM25)
-                search_results = await self._hybrid_search(
-                    collection_name=collection_name,
-                    query=query,
-                    query_vector=query_embedding,
-                    query_filter=search_filter,
-                    limit=max_results,
-                    score_threshold=search_score_threshold
-                )
-            else:
-                # Pure vector search with improved threshold
-                search_results = self.qdrant_client.search(
-                    collection_name=collection_name,
-                    query_vector=query_embedding,
-                    query_filter=search_filter,
-                    limit=max_results,
-                    score_threshold=search_score_threshold
-                )
-            
-            logger.info(f"Raw search results count: {len(search_results)}")
-            
-            # Process results
-            results = []
-            document_scores = {}
-            
-            for i, result in enumerate(search_results):
-                doc_id = result.payload.get("document_id")
-                content = result.payload.get("content", "")
-                score = result.score
-
-                # Generic content extraction for documents without a 'content' field
-                if not content:
-                    # Build content from all text-based fields in the payload
-                    # This makes the RAG module completely agnostic to document structure
-                    text_fields = []
-                    for field, value in result.payload.items():
-                        # Skip system/metadata fields
-                        if field not in ["document_id", "chunk_index", "chunk_count", "indexed_at", "processed_at",
-                                        "file_hash", "mime_type", "file_type", "created_at", "__collection_metadata__"]:
-                            # Include any field that has a non-empty string value
-                            if value and isinstance(value, str) and len(value.strip()) > 0:
-                                text_fields.append(f"{field}: {value}")
-
-                    # Join all text fields to create content
-                    if text_fields:
-                        content = "\n\n".join(text_fields)
-
-                # Log each raw result for debugging
-                logger.info(f"\n--- Raw Result {i+1} ---")
-                logger.info(f"Score: {score}")
-                logger.info(f"Document ID: {doc_id}")
-                logger.info(f"Content preview (first 200 chars): {content[:200]}")
-                logger.info(f"Metadata keys: {list(result.payload.keys())}")
-
-                # Aggregate scores by document
-                if doc_id in document_scores:
-                    document_scores[doc_id]["score"] = max(document_scores[doc_id]["score"], score)
-                    document_scores[doc_id]["content"] += "\n" + content
-                else:
-                    document_scores[doc_id] = {
-                        "score": score,
-                        "content": content,
-                        "metadata": {k: v for k, v in result.payload.items() if k not in ["content", "document_id"]}
-                    }
-            
-            logger.info(f"\nAggregated documents count: {len(document_scores)}")
-            logger.info("=== END ENHANCED RAG SEARCH DEBUGGING ===")
-            
-            # Create SearchResult objects
-            for doc_id, data in document_scores.items():
-                document = Document(
-                    id=doc_id,
-                    content=data["content"],
-                    metadata=data["metadata"]
-                )
-                
-                search_result = SearchResult(
-                    document=document,
-                    score=data["score"],
-                    relevance_score=min(data["score"] * 100, 100)
-                )
-                
-                results.append(search_result)
-            
-            # Sort by score
-            results.sort(key=lambda x: x.score, reverse=True)
-            
-            # Update stats
-            search_time = time.time() - start_time
-            self.stats["searches_performed"] += 1
-            self.stats["average_search_time"] = (
-                (self.stats["average_search_time"] * (self.stats["searches_performed"] - 1) + search_time) /
-                self.stats["searches_performed"]
-            )
-            
-            # Cache results
-            self.search_cache[cache_key] = results
-            
-            log_module_event("rag", "search_completed", {
-                "query": query,
-                "collection": collection_name,
-                "results_count": len(results),
-                "search_time": search_time
-            })
-            
-            return results
-            
-        except Exception as e:
-            logger.error(f"Error searching documents in collection {collection_name}: {e}")
-            log_module_event("rag", "search_failed", {"error": str(e), "collection": collection_name})
-            raise
-    
-    async def delete_document(self, document_id: str, collection_name: str = None) -> bool:
-        """Delete a document from the vector database"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-        
-        collection_name = collection_name or self.default_collection_name
-        
-        try:
-            # Delete all chunks for this document
-            self.qdrant_client.delete(
-                collection_name=collection_name,
-                points_selector=models.FilterSelector(
-                    filter=Filter(
-                        must=[FieldCondition(key="document_id", match=MatchValue(value=document_id))]
-                    )
-                )
-            )
-            
-            log_module_event("rag", "document_deleted", {"document_id": document_id, "collection": collection_name})
-            return True
-            
-        except Exception as e:
-            logger.error(f"Error deleting document from collection {collection_name}: {e}")
-            log_module_event("rag", "deletion_failed", {"error": str(e), "collection": collection_name})
-            return False
-    
-    async def get_stats(self) -> Dict[str, Any]:
-        """Get RAG module statistics"""
-        stats = self.stats.copy()
-        
-        if self.enabled:
-            try:
-                # Use raw HTTP call to avoid Pydantic validation issues
-                import httpx
-                
-                # Direct HTTP call to Qdrant API instead of using client to avoid Pydantic issues
-                qdrant_url = f"http://{settings.QDRANT_HOST}:{settings.QDRANT_PORT}"
-                
-                async with httpx.AsyncClient() as client:
-                    response = await client.get(f"{qdrant_url}/collections/{self.default_collection_name}")
-                    
-                    if response.status_code == 200:
-                        collection_data = response.json()
-                        
-                        # Safely extract stats from raw JSON
-                        result = collection_data.get("result", {})
-                        
-                        basic_stats = {
-                            "total_points": result.get("points_count", 0),
-                            "collection_status": result.get("status", "unknown"),
-                        }
-                        
-                        # Try to get vector dimension from config
-                        try:
-                            config = result.get("config", {})
-                            params = config.get("params", {})
-                            vectors = params.get("vectors", {})
-                            
-                            if isinstance(vectors, dict) and "size" in vectors:
-                                basic_stats["vector_dimension"] = vectors["size"]
-                            else:
-                                basic_stats["vector_dimension"] = "unknown"
-                        except Exception as config_error:
-                            logger.debug(f"Could not get vector dimension: {config_error}")
-                            basic_stats["vector_dimension"] = "unknown"
-                        
-                        stats.update(basic_stats)
-                    else:
-                        # Collection doesn't exist or error
-                        stats.update({
-                            "total_points": 0,
-                            "collection_status": "not_found",
-                            "vector_dimension": "unknown"
-                        })
-                
-            except Exception as e:
-                logger.debug(f"Could not get Qdrant stats (using fallback): {e}")
-                # Add basic fallback stats without logging as error since this is not critical
-                stats.update({
-                    "total_points": 0,
-                    "collection_status": "unavailable",
-                    "vector_dimension": "unknown"
-                })
-        else:
-            stats.update({
-                "total_points": 0,
-                "collection_status": "disabled",
-                "vector_dimension": "unknown"
-            })
-        
-        return stats
-    
-    async def process_request(self, request: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
-        """Process a module request through the interceptor pattern"""
-        if not self.enabled:
-            raise RuntimeError("RAG module not initialized")
-        
-        action = request.get("action", "search")
-        
-        if action == "search":
-            query = request.get("query")
-            if not query:
-                raise ValueError("Query is required for search action")
-            
-            max_results = request.get("max_results", self.config.get("max_results", 10))
-            filters = request.get("filters", {})
-            
-            results = await self.search_documents(query, max_results, filters)
-            
-            return {
-                "action": "search",
-                "query": query,
-                "results": [
-                    {
-                        "document_id": result.document.id,
-                        "content": result.document.content,
-                        "metadata": result.document.metadata,
-                        "score": result.score,
-                        "relevance_score": result.relevance_score
-                    }
-                    for result in results
-                ],
-                "total_results": len(results),
-                "cache_hit": False  # Would be determined by search logic
-            }
-        
-        elif action == "index":
-            content = request.get("content")
-            if not content:
-                raise ValueError("Content is required for index action")
-            
-            metadata = request.get("metadata", {})
-            document_id = await self.index_document(content, metadata)
-            
-            return {
-                "action": "index",
-                "document_id": document_id,
-                "status": "success",
-                "message": "Document indexed successfully"
-            }
-        
-        elif action == "process":
-            file_data = request.get("file_data")
-            filename = request.get("filename")
-            if not file_data or not filename:
-                raise ValueError("File data and filename are required for process action")
-            
-            # Decode base64 file data if provided as string
-            if isinstance(file_data, str):
-                import base64
-                file_data = base64.b64decode(file_data)
-            
-            metadata = request.get("metadata", {})
-            processed_doc = await self.process_document(file_data, filename, metadata)
-            
-            return {
-                "action": "process",
-                "document_id": processed_doc.id,
-                "filename": processed_doc.original_filename,
-                "file_type": processed_doc.file_type,
-                "mime_type": processed_doc.mime_type,
-                "word_count": processed_doc.word_count,
-                "sentence_count": processed_doc.sentence_count,
-                "language": processed_doc.language,
-                "entities_count": len(processed_doc.entities),
-                "keywords_count": len(processed_doc.keywords),
-                "processing_time": processed_doc.processing_time,
-                "status": "success",
-                "message": "Document processed successfully"
-            }
-        
-        elif action == "delete":
-            document_id = request.get("document_id")
-            if not document_id:
-                raise ValueError("Document ID is required for delete action")
-            
-            success = await self.delete_document(document_id)
-            
-            return {
-                "action": "delete",
-                "document_id": document_id,
-                "status": "success" if success else "failed",
-                "message": "Document deleted successfully" if success else "Failed to delete document"
-            }
-        
-        elif action == "stats":
-            stats = await self.get_stats()
-            
-            return {
-                "action": "stats",
-                "statistics": stats
-            }
-        
-        else:
-            raise ValueError(f"Unsupported action: {action}")
-    
-    async def pre_request_interceptor(self, context: Dict[str, Any]) -> Dict[str, Any]:
-        """Pre-request interceptor for RAG enhancement"""
-        if not self.enabled:
-            return context
-        
-        request = context.get("request")
-        if not request:
-            return context
-        
-        # Check if this is a request that could benefit from RAG
-        if request.url.path.startswith("/api/v1/chat") or request.url.path.startswith("/api/v1/completions"):
-            # Extract query/prompt from request
-            request_body = await request.body() if hasattr(request, 'body') else b""
-            
-            if request_body:
-                try:
-                    data = json.loads(request_body.decode())
-                    query = data.get("message", data.get("prompt", ""))
-                    
-                    if query:
-                        # Search for relevant documents
-                        search_results = await self.search_documents(query, max_results=3)
-                        
-                        if search_results:
-                            # Add context to request
-                            context["rag_context"] = [
-                                {
-                                    "content": result.document.content,
-                                    "metadata": result.document.metadata,
-                                    "relevance_score": result.relevance_score
-                                }
-                                for result in search_results
-                            ]
-                            
-                            log_module_event("rag", "context_added", {
-                                "query": query[:100],
-                                "results_count": len(search_results)
-                            })
-                
-                except Exception as e:
-                    logger.error(f"Error processing RAG request: {e}")
-        
-        return context
-
-# Global RAG instance
-rag_module = RAGModule()
-
-# Module interface functions
-async def initialize(config: Dict[str, Any]):
-    """Initialize RAG module"""
-    await rag_module.initialize(config)
-
-async def cleanup():
-    """Cleanup RAG module"""
-    await rag_module.cleanup()
-
-async def pre_request_interceptor(context: Dict[str, Any]) -> Dict[str, Any]:
-    """Pre-request interceptor"""
-    return await rag_module.pre_request_interceptor(context)
-
-# Additional exported functions
-async def process_document(file_data: bytes, filename: str, metadata: Dict[str, Any] = None) -> ProcessedDocument:
-    """Process a document with full content analysis"""
-    return await rag_module.process_document(file_data, filename, metadata)
-
-async def index_document(content: str, metadata: Dict[str, Any] = None, collection_name: str = None) -> str:
-    """Index a document (backward compatibility)"""
-    return await rag_module.index_document(content, metadata, collection_name)
-
-async def index_processed_document(processed_doc: ProcessedDocument, collection_name: str = None) -> str:
-    """Index a processed document"""
-    return await rag_module.index_processed_document(processed_doc, collection_name)
-
-async def search_documents(query: str, max_results: int = None, filters: Dict[str, Any] = None, collection_name: str = None, score_threshold: float = None) -> List[SearchResult]:
-    """Search documents"""
-    return await rag_module.search_documents(query, max_results, filters, collection_name, score_threshold)
-
-async def delete_document(document_id: str, collection_name: str = None) -> bool:
-    """Delete a document"""
-    return await rag_module.delete_document(document_id, collection_name)
-
-async def create_collection(collection_name: str) -> bool:
-    """Create a new Qdrant collection"""
-    return await rag_module.create_collection(collection_name)
-
-async def delete_collection(collection_name: str) -> bool:
-    """Delete a Qdrant collection"""
-    return await rag_module.delete_collection(collection_name)
-
-async def get_supported_types() -> List[str]:
-    """Get list of supported file types"""
-    return list(rag_module.supported_types.keys())
diff --git a/backend/modules/rag/module.yaml b/backend/modules/rag/module.yaml
deleted file mode 100644
index cfe8b53..0000000
--- a/backend/modules/rag/module.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: rag
-version: 1.0.0
-description: "Document search, retrieval, and vector storage"
-author: "Enclava Team"
-category: "ai"
-
-# Module lifecycle
-enabled: true
-auto_start: true
-dependencies: []
-optional_dependencies:
-  - cache
-
-# Module capabilities
-provides:
-  - "document_storage"
-  - "semantic_search"
-  - "vector_embeddings"
-  - "document_processing"
-
-consumes:
-  - "qdrant_connection"
-  - "llm_embeddings"
-  - "document_parsing"
-
-# API endpoints
-endpoints:
-  - path: "/rag/collections"
-    method: "GET"
-    description: "List document collections"
-    
-  - path: "/rag/upload"
-    method: "POST"
-    description: "Upload and process documents"
-    
-  - path: "/rag/search"
-    method: "POST"
-    description: "Semantic search in documents"
-    
-  - path: "/rag/collections/{collection_id}/documents"
-    method: "GET"
-    description: "List documents in collection"
-
-# UI Configuration
-ui_config:
-  icon: "search"
-  color: "#8B5CF6"
-  category: "AI & ML"
-  
-  forms:
-    - name: "collection_config"
-      title: "Collection Settings"
-      fields: ["name", "description", "embedding_model"]
-      
-    - name: "search_config"
-      title: "Search Configuration"
-      fields: ["top_k", "similarity_threshold", "rerank_enabled"]
-
-# Permissions
-permissions:
-  - name: "rag.create"
-    description: "Create document collections"
-    
-  - name: "rag.upload"
-    description: "Upload documents to collections"
-    
-  - name: "rag.search"
-    description: "Search document collections"
-    
-  - name: "rag.manage"
-    description: "Manage all collections (admin)"
-
-# Health checks
-health_checks:
-  - name: "qdrant_connectivity"
-    description: "Check Qdrant vector database connection"
-    
-  - name: "embeddings_service"
-    description: "Check LLM embeddings service"
-    
-  - name: "document_processing"
-    description: "Check document parsing capabilities"
\ No newline at end of file
diff --git a/backend/tests/test_modules.py b/backend/tests/test_modules.py
index 3fac591..e58d0e5 100644
--- a/backend/tests/test_modules.py
+++ b/backend/tests/test_modules.py
@@ -17,19 +17,17 @@ import os
 import sys
 from pathlib import Path
 
-# Add both backend and modules directories to path
+# Add backend directory to Python path for app package imports
 backend_path = Path(__file__).parent.parent
 sys.path.insert(0, str(backend_path))
-sys.path.insert(0, str(backend_path / "modules"))
 
 try:
-    from modules.rag.main import RAGModule
-    from modules.chatbot.main import ChatbotModule
+    from app.modules.rag.main import RAGModule
+    from app.modules.chatbot.main import ChatbotModule
     
     from app.services.module_manager import ModuleManager, ModuleConfig
 except ImportError as e:
     print(f"Import error: {e}")
-    print("Available modules path:", backend_path / "modules")
     # Create mock modules for testing if imports fail
     class MockModule:
         def __init__(self):
@@ -346,4 +344,4 @@ if __name__ == "__main__":
         print("\nAll basic tests completed successfully! 🎉")
     
     # Run the tests
-    asyncio.run(run_basic_tests())
\ No newline at end of file
+    asyncio.run(run_basic_tests())
diff --git a/frontend/src/contexts/PluginContext.tsx b/frontend/src/contexts/PluginContext.tsx
index b936f18..30165d9 100644
--- a/frontend/src/contexts/PluginContext.tsx
+++ b/frontend/src/contexts/PluginContext.tsx
@@ -6,6 +6,7 @@
 import React, { createContext, useContext, useState, useEffect, useCallback, ReactNode } from 'react';
 import { useAuth } from '@/components/providers/auth-provider';
 import { apiClient } from '@/lib/api-client';
+import { useToast } from "@/hooks/use-toast";
 
 export interface PluginInfo {
   id: string;
@@ -122,6 +123,7 @@ interface PluginProviderProps {
 
 export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
   const { user, isAuthenticated } = useAuth();
+  const { toast } = useToast();
   const [installedPlugins, setInstalledPlugins] = useState<PluginInfo[]>([]);
   const [availablePlugins, setAvailablePlugins] = useState<AvailablePlugin[]>([]);
   const [pluginConfigurations, setPluginConfigurations] = useState<Record<string, PluginConfiguration>>({});
@@ -130,6 +132,24 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
   
   // Plugin component registry
   const [pluginComponents, setPluginComponents] = useState<Record<string, Record<string, React.ComponentType>>>({});
+
+  const userPermissions = user?.permissions ?? [];
+
+  const hasPermission = useCallback((required: string) => {
+    if (!required) {
+      return true;
+    }
+    return userPermissions.some(granted => {
+      if (granted === "*" || granted === required) {
+        return true;
+      }
+      if (granted.endsWith(":*")) {
+        const prefix = granted.slice(0, -1);
+        return required.startsWith(prefix);
+      }
+      return false;
+    });
+  }, [userPermissions]);
   
   const apiRequest = async (endpoint: string, options: RequestInit = {}) => {
     if (!isAuthenticated) {
@@ -175,16 +195,23 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
               [plugin.id]: config
             }));
           }
-        } catch (e) {
+        } catch (configError) {
+          console.warn(`Failed to load configuration for plugin ${plugin.id}`, configError);
         }
       }
       
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Failed to load installed plugins');
+      const message = err instanceof Error ? err.message : 'Failed to load installed plugins';
+      setError(message);
+      toast({
+        title: "Plugin load failed",
+        description: message,
+        variant: "destructive",
+      });
     } finally {
       setLoading(false);
     }
-  }, [user, isAuthenticated]);
+  }, [user, isAuthenticated, toast]);
   
   const searchAvailablePlugins = useCallback(async (query = '', tags: string[] = [], category = '') => {
     if (!user || !isAuthenticated) {
@@ -205,11 +232,17 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       setAvailablePlugins(data.plugins);
       
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Failed to search plugins');
+      const message = err instanceof Error ? err.message : 'Failed to search plugins';
+      setError(message);
+      toast({
+        title: "Plugin search failed",
+        description: message,
+        variant: "destructive",
+      });
     } finally {
       setLoading(false);
     }
-  }, [user, isAuthenticated]);
+  }, [user, isAuthenticated, toast]);
   
   const installPlugin = useCallback(async (pluginId: string, version: string): Promise<boolean> => {
     try {
@@ -231,12 +264,18 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Installation failed');
+      const message = err instanceof Error ? err.message : 'Installation failed';
+      setError(message);
+      toast({
+        title: "Plugin installation failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     } finally {
       setLoading(false);
     }
-  }, [refreshInstalledPlugins, searchAvailablePlugins]);
+  }, [refreshInstalledPlugins, searchAvailablePlugins, toast]);
   
   const uninstallPlugin = async (pluginId: string, keepData = true): Promise<boolean> => {
     try {
@@ -263,7 +302,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Uninstallation failed');
+      const message = err instanceof Error ? err.message : 'Uninstallation failed';
+      setError(message);
+      toast({
+        title: "Plugin uninstall failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     } finally {
       setLoading(false);
@@ -281,7 +326,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Enable failed');
+      const message = err instanceof Error ? err.message : 'Enable failed';
+      setError(message);
+      toast({
+        title: "Plugin enable failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     }
   };
@@ -297,7 +348,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Disable failed');
+      const message = err instanceof Error ? err.message : 'Disable failed';
+      setError(message);
+      toast({
+        title: "Plugin disable failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     }
   };
@@ -316,7 +373,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Load failed');
+      const message = err instanceof Error ? err.message : 'Load failed';
+      setError(message);
+      toast({
+        title: "Plugin load failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     }
   };
@@ -338,7 +401,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Unload failed');
+      const message = err instanceof Error ? err.message : 'Unload failed';
+      setError(message);
+      toast({
+        title: "Plugin unload failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     }
   };
@@ -348,6 +417,7 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       const data = await apiRequest(`/${pluginId}/config`);
       return data;
     } catch (err) {
+      console.warn(`Failed to fetch configuration for plugin ${pluginId}`, err);
       return null;
     }
   };
@@ -371,7 +441,13 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       
       return true;
     } catch (err) {
-      setError(err instanceof Error ? err.message : 'Failed to save configuration');
+      const message = err instanceof Error ? err.message : 'Failed to save configuration';
+      setError(message);
+      toast({
+        title: "Save failed",
+        description: message,
+        variant: "destructive",
+      });
       return false;
     }
   };
@@ -402,6 +478,7 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
             }));
           }
         } catch (chatbotError) {
+          console.warn(`Failed to populate chatbot options for plugin ${pluginId}`, chatbotError);
         }
 
         // Populate model options for AI settings
@@ -426,6 +503,7 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
             schema.properties.draft_settings.properties.model.options = modelOptions;
           }
         } catch (modelError) {
+          console.warn(`Failed to populate model options for plugin ${pluginId}`, modelError);
         }
       }
       
@@ -442,11 +520,19 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
             }));
           }
         } catch (modelError) {
+          console.warn(`Failed to populate Signal model options for plugin ${pluginId}`, modelError);
         }
       }
       
       return schema;
     } catch (err) {
+      const message = err instanceof Error ? err.message : 'Failed to load plugin schema';
+      console.error(`Failed to load schema for plugin ${pluginId}`, err);
+      toast({
+        title: "Schema load failed",
+        description: message,
+        variant: "destructive",
+      });
       return null;
     }
   };
@@ -478,6 +564,12 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       }));
       
     } catch (err) {
+      console.error(`Failed to load plugin components for ${pluginId}`, err);
+      toast({
+        title: "Component load failed",
+        description: err instanceof Error ? err.message : 'Unable to load plugin components',
+        variant: "destructive",
+      });
     }
   };
   
@@ -500,9 +592,51 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
   };
   
   const isPluginPageAuthorized = (pluginId: string, pagePath: string): boolean => {
-    // TODO: Implement authorization logic based on user permissions
     const plugin = installedPlugins.find(p => p.id === pluginId);
-    return plugin?.status === 'enabled' && plugin?.loaded;
+    if (!plugin || plugin.status !== 'enabled' || !plugin.loaded) {
+      return false;
+    }
+
+    const manifestPages = plugin.manifest?.spec?.ui_config?.pages ?? plugin.pages ?? [];
+    const page = manifestPages.find((p: any) => p.path === pagePath);
+
+    const requiresAuth = page?.requiresAuth !== false;
+    if (requiresAuth && !isAuthenticated) {
+      return false;
+    }
+
+    const requiredPermissions: string[] =
+      page?.required_permissions ??
+      page?.requiredPermissions ??
+      [];
+
+    if (requiredPermissions.length > 0) {
+      return requiredPermissions.every(perm => hasPermission(perm));
+    }
+
+    if (!requiresAuth) {
+      return true;
+    }
+
+    if (user?.role && ['super_admin', 'admin'].includes(user.role)) {
+      return true;
+    }
+
+    const modulePrefix = `modules:${pluginId}`;
+    const hasModuleAccess = userPermissions.some(granted => {
+      if (granted === 'modules:*') {
+        return true;
+      }
+      if (granted.startsWith(`${modulePrefix}:`)) {
+        return true;
+      }
+      if (granted.endsWith(':*') && granted.startsWith(modulePrefix)) {
+        return true;
+      }
+      return false;
+    });
+
+    return hasModuleAccess;
   };
   
   const getPluginComponent = (pluginId: string, componentName: string): React.ComponentType | null => {
@@ -556,4 +690,4 @@ export const PluginProvider: React.FC<PluginProviderProps> = ({ children }) => {
       {children}
     </PluginContext.Provider>
   );
-};
\ No newline at end of file
+};