enclava/backend/tests/e2e/test_openai_compatibility.py

"""
OpenAI API compatibility tests.
Ensure 100% compatibility with OpenAI Python client and API specification.
"""

import pytest
import openai
from openai import OpenAI
import asyncio
from typing import List, Dict, Any

from tests.clients.openai_test_client import OpenAITestClient, AsyncOpenAITestClient, validate_openai_response_format


class TestOpenAICompatibility:
    """Test OpenAI API compatibility using official OpenAI Python client"""

    BASE_URL = "http://localhost:3001/api/v1"  # Through nginx

    @pytest.fixture
    def test_api_key(self):
        """Test API key for OpenAI compatibility testing"""
        return "sk-test-compatibility-key-12345"

    @pytest.fixture
    def openai_client(self, test_api_key):
        """OpenAI client configured for Enclava"""
        return OpenAITestClient(
            base_url=self.BASE_URL,
            api_key=test_api_key
        )

    @pytest.fixture
    def async_openai_client(self, test_api_key):
        """Async OpenAI client for performance testing"""
        return AsyncOpenAITestClient(
            base_url=self.BASE_URL,
            api_key=test_api_key
        )

    def test_list_models(self, openai_client):
        """Test /v1/models endpoint with OpenAI client"""
        models = openai_client.list_models()

        # Verify response structure
        assert isinstance(models, list)
        assert len(models) > 0, "Should have at least one model"

        # Verify each model has required fields
        for model in models:
            errors = validate_openai_response_format(model, "models")
            assert len(errors) == 0, f"Model validation errors: {errors}"

            assert model["object"] == "model"
            assert "id" in model
            assert "created" in model
            assert "owned_by" in model

    def test_chat_completion_basic(self, openai_client):
        """Test basic chat completion with OpenAI client"""
        response = openai_client.create_chat_completion(
            model="test-model",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Say hello!"}
            ],
            max_tokens=100,
            temperature=0.7
        )

        # Validate response structure
        errors = validate_openai_response_format(response, "chat_completion")
        assert len(errors) == 0, f"Chat completion validation errors: {errors}"

        # Verify required fields
        assert "id" in response
        assert "object" in response
        assert response["object"] == "chat.completion"
        assert "created" in response
        assert "model" in response
        assert "choices" in response
        assert len(response["choices"]) > 0

        # Verify choice structure
        choice = response["choices"][0]
        assert "index" in choice
        assert "message" in choice
        assert "finish_reason" in choice

        # Verify message structure
        message = choice["message"]
        assert "role" in message
        assert "content" in message
        assert message["role"] == "assistant"
        assert isinstance(message["content"], str)
        assert len(message["content"]) > 0

        # Verify usage tracking
        assert "usage" in response
        usage = response["usage"]
        assert "prompt_tokens" in usage
        assert "completion_tokens" in usage
        assert "total_tokens" in usage
        assert usage["total_tokens"] == usage["prompt_tokens"] + usage["completion_tokens"]

    def test_chat_completion_streaming(self, openai_client):
        """Test streaming chat completion"""
        chunks = openai_client.test_streaming_completion(
            model="test-model",
            messages=[{"role": "user", "content": "Count to 5"}],
            max_tokens=100
        )

        # Should receive multiple chunks
        assert len(chunks) > 1, "Streaming should produce multiple chunks"

        # Verify chunk structure
        for i, chunk in enumerate(chunks):
            assert "id" in chunk
            assert "object" in chunk
            assert chunk["object"] == "chat.completion.chunk"
            assert "created" in chunk
            assert "model" in chunk
            assert "choices" in chunk

            if len(chunk["choices"]) > 0:
                choice = chunk["choices"][0]
                assert "index" in choice
                assert "delta" in choice

                # Last chunk should have finish_reason
                if i == len(chunks) - 1:
                    assert choice.get("finish_reason") is not None

    def test_chat_completion_with_functions(self, openai_client):
        """Test chat completion with function calling (if supported)"""
        try:
            functions = [
                {
                    "name": "get_weather",
                    "description": "Get weather information for a location",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "location": {
                                "type": "string",
                                "description": "The city and state"
                            }
                        },
                        "required": ["location"]
                    }
                }
            ]

            response = openai_client.create_chat_completion(
                model="test-model",
                messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
                functions=functions,
                max_tokens=100
            )

            # If functions are supported, verify structure
            if response.get("choices") and response["choices"][0].get("message"):
                message = response["choices"][0]["message"]
                if "function_call" in message:
                    function_call = message["function_call"]
                    assert "name" in function_call
                    assert "arguments" in function_call

        except openai.BadRequestError:
            # Functions might not be supported, that's okay
            pytest.skip("Function calling not supported")

    def test_embeddings(self, openai_client):
        """Test embeddings endpoint"""
        try:
            response = openai_client.create_embedding(
                model="text-embedding-ada-002",
                input_text="Hello world"
            )

            # Validate response structure
            errors = validate_openai_response_format(response, "embeddings")
            assert len(errors) == 0, f"Embeddings validation errors: {errors}"

            # Verify required fields
            assert "object" in response
            assert response["object"] == "list"
            assert "data" in response
            assert len(response["data"]) > 0
            assert "model" in response
            assert "usage" in response

            # Verify embedding structure
            embedding_obj = response["data"][0]
            assert "object" in embedding_obj
            assert embedding_obj["object"] == "embedding"
            assert "embedding" in embedding_obj
            assert "index" in embedding_obj

            # Verify embedding is list of floats
            embedding = embedding_obj["embedding"]
            assert isinstance(embedding, list)
            assert len(embedding) > 0
            assert all(isinstance(x, (int, float)) for x in embedding)

        except openai.NotFoundError:
            pytest.skip("Embedding model not available")

    def test_completions_legacy(self, openai_client):
        """Test legacy completions endpoint"""
        try:
            response = openai_client.create_completion(
                model="test-model",
                prompt="Say hello",
                max_tokens=50
            )

            # Verify response structure
            assert "id" in response
            assert "object" in response
            assert response["object"] == "text_completion"
            assert "created" in response
            assert "model" in response
            assert "choices" in response

            # Verify choice structure
            choice = response["choices"][0]
            assert "text" in choice
            assert "index" in choice
            assert "finish_reason" in choice

        except openai.NotFoundError:
            pytest.skip("Legacy completions not supported")

    def test_error_handling(self, openai_client):
        """Test OpenAI-compatible error responses"""
        error_tests = openai_client.test_error_handling()

        # Verify error test results
        assert "error_tests" in error_tests
        error_results = error_tests["error_tests"]

        # Should have tested multiple error scenarios
        assert len(error_results) > 0

        # Check for proper error handling
        for test_result in error_results:
            if "error_type" in test_result:
                # Should be proper OpenAI error types
                assert test_result["error_type"] in [
                    "BadRequestError",
                    "AuthenticationError",
                    "RateLimitError",
                    "NotFoundError"
                ]

                # Should have proper HTTP status codes
                assert test_result.get("status_code") >= 400

    def test_parameter_validation(self, openai_client):
        """Test parameter validation"""
        # Test invalid temperature
        try:
            openai_client.create_chat_completion(
                model="test-model",
                messages=[{"role": "user", "content": "test"}],
                temperature=2.5  # Should be between 0 and 2
            )
            # If this succeeds, the API is too permissive but that's okay
        except openai.BadRequestError as e:
            assert e.response.status_code == 400

        # Test invalid max_tokens
        try:
            openai_client.create_chat_completion(
                model="test-model",
                messages=[{"role": "user", "content": "test"}],
                max_tokens=-1  # Should be positive
            )
        except openai.BadRequestError as e:
            assert e.response.status_code == 400

    @pytest.mark.asyncio
    async def test_concurrent_requests(self, async_openai_client):
        """Test concurrent API requests"""
        results = await async_openai_client.test_concurrent_requests(10)

        # Verify results
        assert len(results) == 10

        # Calculate success rate
        successful_requests = sum(1 for r in results if r["success"])
        success_rate = successful_requests / len(results)

        # Should handle concurrent requests reasonably well
        assert success_rate >= 0.5, f"Low success rate for concurrent requests: {success_rate}"

        # Check response times
        response_times = [r["response_time"] for r in results if r["success"]]
        if response_times:
            avg_response_time = sum(response_times) / len(response_times)
            assert avg_response_time < 10.0, f"High average response time: {avg_response_time}s"

    @pytest.mark.asyncio
    async def test_streaming_performance(self, async_openai_client):
        """Test streaming response performance"""
        stream_results = await async_openai_client.test_streaming_performance()

        if "error" not in stream_results:
            # Verify streaming metrics
            assert stream_results["chunk_count"] > 0
            assert stream_results["total_time"] > 0

            # First chunk should arrive quickly
            if stream_results["first_chunk_time"]:
                assert stream_results["first_chunk_time"] < 5.0, "First chunk took too long"

    def test_model_parameter_compatibility(self, openai_client):
        """Test model parameter compatibility"""
        # Test with different model names
        model_names = ["test-model", "gpt-3.5-turbo", "gpt-4"]

        for model_name in model_names:
            try:
                response = openai_client.create_chat_completion(
                    model=model_name,
                    messages=[{"role": "user", "content": "test"}],
                    max_tokens=10
                )

                # If successful, verify model name is preserved
                assert response["model"] == model_name or "test-model" in response["model"]

            except openai.NotFoundError:
                # Model not available, that's okay
                continue
            except openai.BadRequestError:
                # Model name not accepted, that's okay
                continue

    def test_message_roles_compatibility(self, openai_client):
        """Test different message roles"""
        # Test with system, user, assistant roles
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello"},
            {"role": "assistant", "content": "Hi there!"},
            {"role": "user", "content": "How are you?"}
        ]

        try:
            response = openai_client.create_chat_completion(
                model="test-model",
                messages=messages,
                max_tokens=50
            )

            # Should handle conversation context properly
            assert response["choices"][0]["message"]["role"] == "assistant"

        except Exception as e:
            pytest.fail(f"Failed to handle message roles: {e}")

    def test_special_characters_handling(self, openai_client):
        """Test handling of special characters and unicode"""
        special_messages = [
            "Hello 世界! 🌍",
            "Math: ∑(x²) = ∫f(x)dx",
            "Code: print('hello\\nworld')",
            "Quotes: \"He said 'hello'\""
        ]

        for message in special_messages:
            try:
                response = openai_client.create_chat_completion(
                    model="test-model",
                    messages=[{"role": "user", "content": message}],
                    max_tokens=50
                )

                # Should return valid response
                assert len(response["choices"][0]["message"]["content"]) > 0

            except Exception as e:
                pytest.fail(f"Failed to handle special characters in '{message}': {e}")

    def test_openai_client_types(self, test_api_key):
        """Test that responses work with OpenAI client type expectations"""
        client = OpenAI(api_key=test_api_key, base_url=self.BASE_URL)

        try:
            # Test that the client can parse responses correctly
            response = client.chat.completions.create(
                model="test-model",
                messages=[{"role": "user", "content": "test"}],
                max_tokens=10
            )

            # These should not raise AttributeError
            assert hasattr(response, 'id')
            assert hasattr(response, 'choices')
            assert hasattr(response, 'usage')
            assert hasattr(response.choices[0], 'message')
            assert hasattr(response.choices[0].message, 'content')

        except openai.AuthenticationError:
            # Expected if test API key is not set up
            pytest.skip("Test API key not configured")
        except Exception as e:
            pytest.fail(f"OpenAI client type compatibility failed: {e}")