enclava/backend/tests/e2e/test_chatbot_rag_workflow.py

"""
Complete chatbot workflow tests with RAG integration.
Test the entire pipeline from document upload to chat responses with knowledge retrieval.
"""

import pytest
import asyncio
from typing import Dict, Any, List

from tests.clients.chatbot_api_client import ChatbotAPITestClient
from tests.fixtures.test_data_manager import TestDataManager


class TestChatbotRAGWorkflow:
    """Test complete chatbot workflow with RAG integration"""

    BASE_URL = "http://localhost:3001"  # Through nginx

    @pytest.fixture
    async def api_client(self):
        """Chatbot API test client"""
        return ChatbotAPITestClient(self.BASE_URL)

    @pytest.fixture
    async def authenticated_client(self, api_client):
        """Pre-authenticated API client"""
        # Register and authenticate test user
        email = "ragtest@example.com"
        password = "testpass123"
        username = "ragtestuser"

        # Register user
        register_result = await api_client.register_user(email, password, username)
        if register_result["status_code"] not in [201, 409]:  # 409 = already exists
            pytest.fail(f"Failed to register user: {register_result}")

        # Authenticate
        auth_result = await api_client.authenticate(email, password)
        if not auth_result["success"]:
            pytest.fail(f"Failed to authenticate: {auth_result}")

        return api_client

    @pytest.fixture
    def sample_documents(self):
        """Sample documents for RAG testing"""
        return {
            "installation_guide": {
                "filename": "installation_guide.md",
                "content": """
                # Enclava Platform Installation Guide

                ## System Requirements
                - Python 3.8 or higher
                - Docker and Docker Compose
                - PostgreSQL 13+
                - Redis 6+
                - At least 4GB RAM

                ## Installation Steps
                1. Clone the repository
                2. Copy .env.example to .env
                3. Run docker-compose up --build
                4. Access the application at http://localhost:3000

                ## Troubleshooting
                - If port 3000 is in use, modify docker-compose.yml
                - Check Docker daemon is running
                - Ensure all required ports are available
                """,
                "test_questions": [
                    {
                        "question": "What are the system requirements for Enclava?",
                        "expected_keywords": ["Python 3.8", "Docker", "PostgreSQL", "Redis", "4GB RAM"],
                        "min_keywords": 3
                    },
                    {
                        "question": "How do I install Enclava?",
                        "expected_keywords": ["clone", "repository", ".env", "docker-compose up", "localhost:3000"],
                        "min_keywords": 3
                    },
                    {
                        "question": "What should I do if port 3000 is in use?",
                        "expected_keywords": ["modify", "docker-compose.yml", "port"],
                        "min_keywords": 2
                    }
                ]
            },
            "api_reference": {
                "filename": "api_reference.md",
                "content": """
                # Enclava API Reference

                ## Authentication
                All API requests require authentication using Bearer tokens or API keys.

                ## Endpoints

                ### GET /api/v1/models
                List available AI models
                Response: {"data": [{"id": "model-name", "object": "model", ...}]}

                ### POST /api/v1/chat/completions
                Create chat completion
                Body: {"model": "model-name", "messages": [...], "temperature": 0.7}
                Response: {"choices": [{"message": {"content": "response"}}]}

                ### POST /api/v1/embeddings
                Generate text embeddings
                Body: {"model": "embedding-model", "input": "text to embed"}
                Response: {"data": [{"embedding": [...]}]}

                ## Rate Limits
                - Free tier: 60 requests per minute
                - Pro tier: 600 requests per minute
                """,
                "test_questions": [
                    {
                        "question": "How do I authenticate with the Enclava API?",
                        "expected_keywords": ["Bearer token", "API key", "authentication"],
                        "min_keywords": 2
                    },
                    {
                        "question": "What is the endpoint for chat completions?",
                        "expected_keywords": ["/api/v1/chat/completions", "POST"],
                        "min_keywords": 1
                    },
                    {
                        "question": "What are the rate limits?",
                        "expected_keywords": ["60 requests", "600 requests", "per minute", "free tier", "pro tier"],
                        "min_keywords": 3
                    }
                ]
            }
        }

    @pytest.mark.asyncio
    async def test_complete_rag_workflow(self, authenticated_client, sample_documents):
        """Test complete RAG workflow from document upload to chat response"""

        # Test with installation guide document
        doc_info = sample_documents["installation_guide"]

        result = await authenticated_client.test_rag_workflow(
            collection_name="Installation Guide Collection",
            document_content=doc_info["content"],
            chatbot_name="Installation Assistant",
            test_question=doc_info["test_questions"][0]["question"]
        )

        assert result["success"], f"RAG workflow failed: {result.get('error')}"
        assert result["workflow_complete"], "Workflow did not complete successfully"
        assert result["rag_working"], "RAG functionality is not working"

        # Verify all workflow steps succeeded
        workflow_results = result["results"]
        assert workflow_results["collection_creation"]["success"]
        assert workflow_results["document_upload"]["success"]
        assert workflow_results["document_processing"]["success"]
        assert workflow_results["chatbot_creation"]["success"]
        assert workflow_results["api_key_creation"]["success"]
        assert workflow_results["chat_test"]["success"]

        # Verify RAG sources were provided
        rag_verification = workflow_results["rag_verification"]
        assert rag_verification["has_sources"]
        assert rag_verification["source_count"] > 0

    @pytest.mark.asyncio
    async def test_rag_knowledge_accuracy(self, authenticated_client, sample_documents):
        """Test RAG system accuracy with known documents and questions"""

        for doc_key, doc_info in sample_documents.items():
            # Create RAG workflow for this document
            workflow_result = await authenticated_client.test_rag_workflow(
                collection_name=f"Test Collection - {doc_key}",
                document_content=doc_info["content"],
                chatbot_name=f"Test Assistant - {doc_key}",
                test_question=doc_info["test_questions"][0]["question"]  # Use first question for setup
            )

            if not workflow_result["success"]:
                pytest.fail(f"Failed to set up RAG workflow for {doc_key}: {workflow_result.get('error')}")

            # Extract chatbot info for testing
            chatbot_id = workflow_result["results"]["chatbot_creation"]["data"]["id"]
            api_key = workflow_result["results"]["api_key_creation"]["data"]["key"]

            # Test each question for this document
            for question_data in doc_info["test_questions"]:
                chat_result = await authenticated_client.chat_with_bot(
                    chatbot_id=chatbot_id,
                    message=question_data["question"],
                    api_key=api_key
                )

                assert chat_result["success"], f"Chat failed for question: {question_data['question']}"

                # Analyze response accuracy
                response_text = chat_result["data"]["response"].lower()
                keywords_found = sum(
                    1 for keyword in question_data["expected_keywords"]
                    if keyword.lower() in response_text
                )

                accuracy = keywords_found / len(question_data["expected_keywords"])
                min_accuracy = question_data["min_keywords"] / len(question_data["expected_keywords"])

                assert accuracy >= min_accuracy, \
                    f"Accuracy {accuracy:.2f} below minimum {min_accuracy:.2f} for question: {question_data['question']} in {doc_key}"

                # Verify sources were provided
                assert "sources" in chat_result["data"], f"No sources provided for question in {doc_key}"
                assert len(chat_result["data"]["sources"]) > 0, f"Empty sources for question in {doc_key}"

    @pytest.mark.asyncio
    async def test_conversation_memory_with_rag(self, authenticated_client, sample_documents):
        """Test conversation memory functionality with RAG"""

        # Set up RAG chatbot
        doc_info = sample_documents["api_reference"]
        workflow_result = await authenticated_client.test_rag_workflow(
            collection_name="Memory Test Collection",
            document_content=doc_info["content"],
            chatbot_name="Memory Test Assistant",
            test_question="What is the API reference?"
        )

        assert workflow_result["success"], f"Failed to set up RAG workflow: {workflow_result.get('error')}"

        chatbot_id = workflow_result["results"]["chatbot_creation"]["data"]["id"]
        api_key = workflow_result["results"]["api_key_creation"]["data"]["key"]

        # Test conversation memory
        memory_result = await authenticated_client.test_conversation_memory(chatbot_id, api_key)

        # Verify conversation was maintained
        assert memory_result["conversation_maintained"], "Conversation ID was not maintained across messages"

        # Verify memory is working (may be challenging with RAG, so we're lenient)
        conversation_results = memory_result["conversation_results"]
        assert len(conversation_results) >= 3, "Not all conversation messages were processed"

        # All messages should have gotten responses
        for result in conversation_results:
            assert "response" in result or "error" in result, "Message did not get a response"

    @pytest.mark.asyncio
    async def test_multi_document_rag(self, authenticated_client, sample_documents):
        """Test RAG with multiple documents in one collection"""

        # Create collection
        collection_result = await authenticated_client.create_rag_collection(
            name="Multi-Document Collection",
            description="Collection with multiple documents for testing"
        )
        assert collection_result["success"], f"Failed to create collection: {collection_result}"

        collection_id = collection_result["data"]["id"]

        # Upload multiple documents
        uploaded_docs = []
        for doc_key, doc_info in sample_documents.items():
            upload_result = await authenticated_client.upload_document(
                collection_id=collection_id,
                file_content=doc_info["content"],
                filename=doc_info["filename"]
            )

            assert upload_result["success"], f"Failed to upload {doc_key}: {upload_result}"

            # Wait for processing
            doc_id = upload_result["data"]["id"]
            processing_result = await authenticated_client.wait_for_document_processing(doc_id)
            assert processing_result["success"], f"Processing failed for {doc_key}: {processing_result}"

            uploaded_docs.append(doc_key)

        # Create chatbot with access to all documents
        chatbot_result = await authenticated_client.create_chatbot(
            name="Multi-Doc Assistant",
            use_rag=True,
            rag_collection="Multi-Document Collection"
        )
        assert chatbot_result["success"], f"Failed to create chatbot: {chatbot_result}"

        chatbot_id = chatbot_result["data"]["id"]

        # Create API key
        api_key_result = await authenticated_client.create_api_key_for_chatbot(chatbot_id)
        assert api_key_result["success"], f"Failed to create API key: {api_key_result}"

        api_key = api_key_result["data"]["key"]

        # Test questions that should draw from different documents
        test_questions = [
            "How do I install Enclava?",  # Should use installation guide
            "What are the API endpoints?",  # Should use API reference
            "Tell me about both installation and API usage"  # Should use both documents
        ]

        for question in test_questions:
            chat_result = await authenticated_client.chat_with_bot(
                chatbot_id=chatbot_id,
                message=question,
                api_key=api_key
            )

            assert chat_result["success"], f"Chat failed for multi-doc question: {question}"
            assert "sources" in chat_result["data"], f"No sources for multi-doc question: {question}"
            assert len(chat_result["data"]["sources"]) > 0, f"Empty sources for multi-doc question: {question}"

    @pytest.mark.asyncio
    async def test_rag_collection_isolation(self, authenticated_client, sample_documents):
        """Test that RAG collections are properly isolated"""

        # Create two separate collections with different documents
        doc1 = sample_documents["installation_guide"]
        doc2 = sample_documents["api_reference"]

        # Collection 1 with installation guide
        workflow1 = await authenticated_client.test_rag_workflow(
            collection_name="Installation Only Collection",
            document_content=doc1["content"],
            chatbot_name="Installation Only Bot",
            test_question="What is installation?"
        )
        assert workflow1["success"], "Failed to create first RAG workflow"

        # Collection 2 with API reference
        workflow2 = await authenticated_client.test_rag_workflow(
            collection_name="API Only Collection",
            document_content=doc2["content"],
            chatbot_name="API Only Bot",
            test_question="What is API?"
        )
        assert workflow2["success"], "Failed to create second RAG workflow"

        # Extract chatbot info
        bot1_id = workflow1["results"]["chatbot_creation"]["data"]["id"]
        bot1_key = workflow1["results"]["api_key_creation"]["data"]["key"]

        bot2_id = workflow2["results"]["chatbot_creation"]["data"]["id"]
        bot2_key = workflow2["results"]["api_key_creation"]["data"]["key"]

        # Test cross-contamination
        # Bot 1 (installation only) should not know about API details
        api_question = "What are the rate limits?"
        result1 = await authenticated_client.chat_with_bot(bot1_id, api_question, api_key=bot1_key)

        if result1["success"]:
            response1 = result1["data"]["response"].lower()
            # Should not have detailed API rate limit info since it only has installation docs
            has_rate_info = "60 requests" in response1 or "600 requests" in response1
            # This is a soft assertion since the bot might still give a generic response

        # Bot 2 (API only) should not know about installation details
        install_question = "What are the system requirements?"
        result2 = await authenticated_client.chat_with_bot(bot2_id, install_question, api_key=bot2_key)

        if result2["success"]:
            response2 = result2["data"]["response"].lower()
            # Should not have detailed system requirements since it only has API docs
            has_install_info = "python 3.8" in response2 or "docker" in response2
            # This is a soft assertion since the bot might still give a generic response

    @pytest.mark.asyncio
    async def test_rag_error_handling(self, authenticated_client):
        """Test RAG error handling scenarios"""

        # Test chatbot with non-existent collection
        chatbot_result = await authenticated_client.create_chatbot(
            name="Error Test Bot",
            use_rag=True,
            rag_collection="NonExistentCollection"
        )

        # Should either fail to create or handle gracefully
        if chatbot_result["success"]:
            # If creation succeeded, test that chat handles missing collection gracefully
            chatbot_id = chatbot_result["data"]["id"]

            api_key_result = await authenticated_client.create_api_key_for_chatbot(chatbot_id)
            if api_key_result["success"]:
                api_key = api_key_result["data"]["key"]

                chat_result = await authenticated_client.chat_with_bot(
                    chatbot_id=chatbot_id,
                    message="Tell me about something",
                    api_key=api_key
                )

                # Should handle gracefully - either succeed with fallback or fail gracefully
                # Don't assert success/failure, just ensure it doesn't crash
                assert "data" in chat_result or "error" in chat_result

    @pytest.mark.asyncio
    async def test_rag_document_types(self, authenticated_client):
        """Test RAG with different document types and formats"""

        document_types = {
            "markdown": {
                "filename": "test.md",
                "content": "# Markdown Test\n\nThis is **bold** text and *italic* text.\n\n- List item 1\n- List item 2"
            },
            "plain_text": {
                "filename": "test.txt",
                "content": "This is plain text content for testing document processing and retrieval."
            },
            "json_like": {
                "filename": "config.txt",
                "content": '{"setting": "value", "number": 42, "enabled": true}'
            }
        }

        # Create collection
        collection_result = await authenticated_client.create_rag_collection(
            name="Document Types Collection",
            description="Testing different document formats"
        )
        assert collection_result["success"], f"Failed to create collection: {collection_result}"

        collection_id = collection_result["data"]["id"]

        # Upload each document type
        for doc_type, doc_info in document_types.items():
            upload_result = await authenticated_client.upload_document(
                collection_id=collection_id,
                file_content=doc_info["content"],
                filename=doc_info["filename"]
            )

            assert upload_result["success"], f"Failed to upload {doc_type}: {upload_result}"

            # Wait for processing
            doc_id = upload_result["data"]["id"]
            processing_result = await authenticated_client.wait_for_document_processing(doc_id, timeout=30)
            assert processing_result["success"], f"Processing failed for {doc_type}: {processing_result}"

        # Create chatbot to test all document types
        chatbot_result = await authenticated_client.create_chatbot(
            name="Document Types Bot",
            use_rag=True,
            rag_collection="Document Types Collection"
        )
        assert chatbot_result["success"], f"Failed to create chatbot: {chatbot_result}"

        chatbot_id = chatbot_result["data"]["id"]

        api_key_result = await authenticated_client.create_api_key_for_chatbot(chatbot_id)
        assert api_key_result["success"], f"Failed to create API key: {api_key_result}"

        api_key = api_key_result["data"]["key"]

        # Test questions for different document types
        test_questions = [
            "What is bold text?",  # Should find markdown
            "What is the plain text content?",  # Should find plain text
            "What is the setting value?",  # Should find JSON-like content
        ]

        for question in test_questions:
            chat_result = await authenticated_client.chat_with_bot(
                chatbot_id=chatbot_id,
                message=question,
                api_key=api_key
            )

            assert chat_result["success"], f"Chat failed for document type question: {question}"
            # Should have sources even if the answer quality varies
            assert "sources" in chat_result["data"], f"No sources for question: {question}"