Files
enclava/backend/tests/e2e/test_openai_compatibility.py
2025-08-25 17:13:15 +02:00

411 lines
16 KiB
Python

"""
OpenAI API compatibility tests.
Ensure 100% compatibility with OpenAI Python client and API specification.
"""
import pytest
import openai
from openai import OpenAI
import asyncio
from typing import List, Dict, Any
from tests.clients.openai_test_client import OpenAITestClient, AsyncOpenAITestClient, validate_openai_response_format
class TestOpenAICompatibility:
"""Test OpenAI API compatibility using official OpenAI Python client"""
BASE_URL = "http://localhost:3001/api/v1" # Through nginx
@pytest.fixture
def test_api_key(self):
"""Test API key for OpenAI compatibility testing"""
return "sk-test-compatibility-key-12345"
@pytest.fixture
def openai_client(self, test_api_key):
"""OpenAI client configured for Enclava"""
return OpenAITestClient(
base_url=self.BASE_URL,
api_key=test_api_key
)
@pytest.fixture
def async_openai_client(self, test_api_key):
"""Async OpenAI client for performance testing"""
return AsyncOpenAITestClient(
base_url=self.BASE_URL,
api_key=test_api_key
)
def test_list_models(self, openai_client):
"""Test /v1/models endpoint with OpenAI client"""
models = openai_client.list_models()
# Verify response structure
assert isinstance(models, list)
assert len(models) > 0, "Should have at least one model"
# Verify each model has required fields
for model in models:
errors = validate_openai_response_format(model, "models")
assert len(errors) == 0, f"Model validation errors: {errors}"
assert model["object"] == "model"
assert "id" in model
assert "created" in model
assert "owned_by" in model
def test_chat_completion_basic(self, openai_client):
"""Test basic chat completion with OpenAI client"""
response = openai_client.create_chat_completion(
model="test-model",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Say hello!"}
],
max_tokens=100,
temperature=0.7
)
# Validate response structure
errors = validate_openai_response_format(response, "chat_completion")
assert len(errors) == 0, f"Chat completion validation errors: {errors}"
# Verify required fields
assert "id" in response
assert "object" in response
assert response["object"] == "chat.completion"
assert "created" in response
assert "model" in response
assert "choices" in response
assert len(response["choices"]) > 0
# Verify choice structure
choice = response["choices"][0]
assert "index" in choice
assert "message" in choice
assert "finish_reason" in choice
# Verify message structure
message = choice["message"]
assert "role" in message
assert "content" in message
assert message["role"] == "assistant"
assert isinstance(message["content"], str)
assert len(message["content"]) > 0
# Verify usage tracking
assert "usage" in response
usage = response["usage"]
assert "prompt_tokens" in usage
assert "completion_tokens" in usage
assert "total_tokens" in usage
assert usage["total_tokens"] == usage["prompt_tokens"] + usage["completion_tokens"]
def test_chat_completion_streaming(self, openai_client):
"""Test streaming chat completion"""
chunks = openai_client.test_streaming_completion(
model="test-model",
messages=[{"role": "user", "content": "Count to 5"}],
max_tokens=100
)
# Should receive multiple chunks
assert len(chunks) > 1, "Streaming should produce multiple chunks"
# Verify chunk structure
for i, chunk in enumerate(chunks):
assert "id" in chunk
assert "object" in chunk
assert chunk["object"] == "chat.completion.chunk"
assert "created" in chunk
assert "model" in chunk
assert "choices" in chunk
if len(chunk["choices"]) > 0:
choice = chunk["choices"][0]
assert "index" in choice
assert "delta" in choice
# Last chunk should have finish_reason
if i == len(chunks) - 1:
assert choice.get("finish_reason") is not None
def test_chat_completion_with_functions(self, openai_client):
"""Test chat completion with function calling (if supported)"""
try:
functions = [
{
"name": "get_weather",
"description": "Get weather information for a location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state"
}
},
"required": ["location"]
}
}
]
response = openai_client.create_chat_completion(
model="test-model",
messages=[{"role": "user", "content": "What's the weather in San Francisco?"}],
functions=functions,
max_tokens=100
)
# If functions are supported, verify structure
if response.get("choices") and response["choices"][0].get("message"):
message = response["choices"][0]["message"]
if "function_call" in message:
function_call = message["function_call"]
assert "name" in function_call
assert "arguments" in function_call
except openai.BadRequestError:
# Functions might not be supported, that's okay
pytest.skip("Function calling not supported")
def test_embeddings(self, openai_client):
"""Test embeddings endpoint"""
try:
response = openai_client.create_embedding(
model="text-embedding-ada-002",
input_text="Hello world"
)
# Validate response structure
errors = validate_openai_response_format(response, "embeddings")
assert len(errors) == 0, f"Embeddings validation errors: {errors}"
# Verify required fields
assert "object" in response
assert response["object"] == "list"
assert "data" in response
assert len(response["data"]) > 0
assert "model" in response
assert "usage" in response
# Verify embedding structure
embedding_obj = response["data"][0]
assert "object" in embedding_obj
assert embedding_obj["object"] == "embedding"
assert "embedding" in embedding_obj
assert "index" in embedding_obj
# Verify embedding is list of floats
embedding = embedding_obj["embedding"]
assert isinstance(embedding, list)
assert len(embedding) > 0
assert all(isinstance(x, (int, float)) for x in embedding)
except openai.NotFoundError:
pytest.skip("Embedding model not available")
def test_completions_legacy(self, openai_client):
"""Test legacy completions endpoint"""
try:
response = openai_client.create_completion(
model="test-model",
prompt="Say hello",
max_tokens=50
)
# Verify response structure
assert "id" in response
assert "object" in response
assert response["object"] == "text_completion"
assert "created" in response
assert "model" in response
assert "choices" in response
# Verify choice structure
choice = response["choices"][0]
assert "text" in choice
assert "index" in choice
assert "finish_reason" in choice
except openai.NotFoundError:
pytest.skip("Legacy completions not supported")
def test_error_handling(self, openai_client):
"""Test OpenAI-compatible error responses"""
error_tests = openai_client.test_error_handling()
# Verify error test results
assert "error_tests" in error_tests
error_results = error_tests["error_tests"]
# Should have tested multiple error scenarios
assert len(error_results) > 0
# Check for proper error handling
for test_result in error_results:
if "error_type" in test_result:
# Should be proper OpenAI error types
assert test_result["error_type"] in [
"BadRequestError",
"AuthenticationError",
"RateLimitError",
"NotFoundError"
]
# Should have proper HTTP status codes
assert test_result.get("status_code") >= 400
def test_parameter_validation(self, openai_client):
"""Test parameter validation"""
# Test invalid temperature
try:
openai_client.create_chat_completion(
model="test-model",
messages=[{"role": "user", "content": "test"}],
temperature=2.5 # Should be between 0 and 2
)
# If this succeeds, the API is too permissive but that's okay
except openai.BadRequestError as e:
assert e.response.status_code == 400
# Test invalid max_tokens
try:
openai_client.create_chat_completion(
model="test-model",
messages=[{"role": "user", "content": "test"}],
max_tokens=-1 # Should be positive
)
except openai.BadRequestError as e:
assert e.response.status_code == 400
@pytest.mark.asyncio
async def test_concurrent_requests(self, async_openai_client):
"""Test concurrent API requests"""
results = await async_openai_client.test_concurrent_requests(10)
# Verify results
assert len(results) == 10
# Calculate success rate
successful_requests = sum(1 for r in results if r["success"])
success_rate = successful_requests / len(results)
# Should handle concurrent requests reasonably well
assert success_rate >= 0.5, f"Low success rate for concurrent requests: {success_rate}"
# Check response times
response_times = [r["response_time"] for r in results if r["success"]]
if response_times:
avg_response_time = sum(response_times) / len(response_times)
assert avg_response_time < 10.0, f"High average response time: {avg_response_time}s"
@pytest.mark.asyncio
async def test_streaming_performance(self, async_openai_client):
"""Test streaming response performance"""
stream_results = await async_openai_client.test_streaming_performance()
if "error" not in stream_results:
# Verify streaming metrics
assert stream_results["chunk_count"] > 0
assert stream_results["total_time"] > 0
# First chunk should arrive quickly
if stream_results["first_chunk_time"]:
assert stream_results["first_chunk_time"] < 5.0, "First chunk took too long"
def test_model_parameter_compatibility(self, openai_client):
"""Test model parameter compatibility"""
# Test with different model names
model_names = ["test-model", "gpt-3.5-turbo", "gpt-4"]
for model_name in model_names:
try:
response = openai_client.create_chat_completion(
model=model_name,
messages=[{"role": "user", "content": "test"}],
max_tokens=10
)
# If successful, verify model name is preserved
assert response["model"] == model_name or "test-model" in response["model"]
except openai.NotFoundError:
# Model not available, that's okay
continue
except openai.BadRequestError:
# Model name not accepted, that's okay
continue
def test_message_roles_compatibility(self, openai_client):
"""Test different message roles"""
# Test with system, user, assistant roles
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there!"},
{"role": "user", "content": "How are you?"}
]
try:
response = openai_client.create_chat_completion(
model="test-model",
messages=messages,
max_tokens=50
)
# Should handle conversation context properly
assert response["choices"][0]["message"]["role"] == "assistant"
except Exception as e:
pytest.fail(f"Failed to handle message roles: {e}")
def test_special_characters_handling(self, openai_client):
"""Test handling of special characters and unicode"""
special_messages = [
"Hello 世界! 🌍",
"Math: ∑(x²) = ∫f(x)dx",
"Code: print('hello\\nworld')",
"Quotes: \"He said 'hello'\""
]
for message in special_messages:
try:
response = openai_client.create_chat_completion(
model="test-model",
messages=[{"role": "user", "content": message}],
max_tokens=50
)
# Should return valid response
assert len(response["choices"][0]["message"]["content"]) > 0
except Exception as e:
pytest.fail(f"Failed to handle special characters in '{message}': {e}")
def test_openai_client_types(self, test_api_key):
"""Test that responses work with OpenAI client type expectations"""
client = OpenAI(api_key=test_api_key, base_url=self.BASE_URL)
try:
# Test that the client can parse responses correctly
response = client.chat.completions.create(
model="test-model",
messages=[{"role": "user", "content": "test"}],
max_tokens=10
)
# These should not raise AttributeError
assert hasattr(response, 'id')
assert hasattr(response, 'choices')
assert hasattr(response, 'usage')
assert hasattr(response.choices[0], 'message')
assert hasattr(response.choices[0].message, 'content')
except openai.AuthenticationError:
# Expected if test API key is not set up
pytest.skip("Test API key not configured")
except Exception as e:
pytest.fail(f"OpenAI client type compatibility failed: {e}")