Files
enclava/backend/tests/integration/test_llm_service_integration.py

496 lines
19 KiB
Python

"""
Integration tests for the new LLM service.
Tests end-to-end functionality including provider integration, security, and performance.
"""
import pytest
import asyncio
import time
from httpx import AsyncClient
from unittest.mock import patch, AsyncMock, MagicMock
import json
class TestLLMServiceIntegration:
"""Integration tests for LLM service."""
@pytest.mark.asyncio
async def test_full_chat_flow(self, client: AsyncClient):
"""Test complete chat completion flow with security and budget checks."""
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
# Mock successful LLM service response
mock_response = ChatCompletionResponse(
id="test-completion-123",
object="chat.completion",
created=int(time.time()),
model="privatemode-llama-3-70b",
choices=[
ChatChoice(
index=0,
message=ChatMessage(
role="assistant",
content="Hello! I'm a TEE-protected AI assistant. How can I help you today?"
),
finish_reason="stop"
)
],
usage=Usage(
prompt_tokens=25,
completion_tokens=15,
total_tokens=40
),
security_analysis={
"risk_score": 0.1,
"threats_detected": [],
"risk_level": "low",
"analysis_time_ms": 12.5
}
)
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat, \
patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
mock_chat.return_value = mock_response
mock_budget.return_value = True # Budget check passes
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello, what are your capabilities?"}
],
"temperature": 0.7,
"max_tokens": 150
},
headers={"Authorization": "Bearer test-api-key"}
)
# Verify response structure
assert response.status_code == 200
data = response.json()
# Check standard OpenAI-compatible fields
assert "id" in data
assert "object" in data
assert "created" in data
assert "model" in data
assert "choices" in data
assert "usage" in data
# Check security integration
assert "security_analysis" in data
assert data["security_analysis"]["risk_level"] == "low"
# Verify content
assert len(data["choices"]) == 1
assert data["choices"][0]["message"]["role"] == "assistant"
assert "TEE-protected" in data["choices"][0]["message"]["content"]
# Verify usage tracking
assert data["usage"]["total_tokens"] == 40
assert data["usage"]["prompt_tokens"] == 25
assert data["usage"]["completion_tokens"] == 15
@pytest.mark.asyncio
async def test_embedding_integration(self, client: AsyncClient):
"""Test embedding generation with fallback handling."""
from app.services.llm.models import EmbeddingResponse, EmbeddingData, Usage
# Create realistic 1024-dimensional embedding
embedding_vector = [0.1 * i for i in range(1024)]
mock_response = EmbeddingResponse(
object="list",
data=[
EmbeddingData(
object="embedding",
embedding=embedding_vector,
index=0
)
],
model="privatemode-embeddings",
usage=Usage(
prompt_tokens=8,
total_tokens=8
)
)
with patch("app.services.llm.service.llm_service.create_embedding") as mock_embedding:
mock_embedding.return_value = mock_response
response = await client.post(
"/api/v1/llm/embeddings",
json={
"model": "privatemode-embeddings",
"input": "This is a test document for embedding generation."
},
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 200
data = response.json()
# Verify embedding structure
assert "object" in data
assert "data" in data
assert "usage" in data
assert len(data["data"]) == 1
assert len(data["data"][0]["embedding"]) == 1024
assert data["data"][0]["index"] == 0
@pytest.mark.asyncio
async def test_provider_health_integration(self, client: AsyncClient):
"""Test provider health monitoring integration."""
mock_status = {
"privatemode": {
"provider": "PrivateMode.ai",
"status": "healthy",
"latency_ms": 245.8,
"success_rate": 0.987,
"last_check": "2025-01-01T12:00:00Z",
"error_message": None,
"models_available": [
"privatemode-llama-3-70b",
"privatemode-claude-3-sonnet",
"privatemode-gpt-4o",
"privatemode-embeddings"
]
}
}
with patch("app.services.llm.service.llm_service.get_provider_status") as mock_provider:
mock_provider.return_value = mock_status
response = await client.get(
"/api/v1/llm/providers/status",
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 200
data = response.json()
# Check response structure
assert "data" in data
assert "privatemode" in data["data"]
provider_data = data["data"]["privatemode"]
assert provider_data["status"] == "healthy"
assert provider_data["latency_ms"] < 300 # Reasonable latency
assert provider_data["success_rate"] > 0.95 # High success rate
assert len(provider_data["models_available"]) >= 4
@pytest.mark.asyncio
async def test_error_handling_and_fallback(self, client: AsyncClient):
"""Test error handling and fallback scenarios."""
# Test provider unavailable scenario
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
mock_chat.side_effect = Exception("Provider temporarily unavailable")
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": "Hello"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
# Should return error but not crash
assert response.status_code in [500, 503] # Server error or service unavailable
@pytest.mark.asyncio
async def test_security_threat_detection(self, client: AsyncClient):
"""Test security threat detection integration."""
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
# Mock response with security threat detected
mock_response = ChatCompletionResponse(
id="test-completion-security",
object="chat.completion",
created=int(time.time()),
model="privatemode-llama-3-70b",
choices=[
ChatChoice(
index=0,
message=ChatMessage(
role="assistant",
content="I cannot help with that request as it violates security policies."
),
finish_reason="stop"
)
],
usage=Usage(
prompt_tokens=15,
completion_tokens=12,
total_tokens=27
),
security_analysis={
"risk_score": 0.8,
"threats_detected": ["potential_malicious_code"],
"risk_level": "high",
"blocked": True,
"analysis_time_ms": 45.2
}
)
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
mock_chat.return_value = mock_response
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": "How to create malicious code?"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 200 # Request succeeds but content is filtered
data = response.json()
# Verify security analysis
assert "security_analysis" in data
assert data["security_analysis"]["risk_level"] == "high"
assert data["security_analysis"]["blocked"] is True
assert "malicious" in data["security_analysis"]["threats_detected"][0]
@pytest.mark.asyncio
async def test_performance_characteristics(self, client: AsyncClient):
"""Test performance characteristics of the LLM service."""
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
# Mock fast response
mock_response = ChatCompletionResponse(
id="test-perf",
object="chat.completion",
created=int(time.time()),
model="privatemode-llama-3-70b",
choices=[
ChatChoice(
index=0,
message=ChatMessage(
role="assistant",
content="Quick response for performance testing."
),
finish_reason="stop"
)
],
usage=Usage(
prompt_tokens=10,
completion_tokens=8,
total_tokens=18
)
)
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
mock_chat.return_value = mock_response
# Measure response time
start_time = time.time()
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": "Quick test"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
response_time = time.time() - start_time
assert response.status_code == 200
# API should respond quickly (mocked, so should be very fast)
assert response_time < 1.0 # Less than 1 second for mocked response
@pytest.mark.asyncio
async def test_model_capabilities_detection(self, client: AsyncClient):
"""Test model capabilities detection and reporting."""
from app.services.llm.models import Model
mock_models = [
Model(
id="privatemode-llama-3-70b",
object="model",
created=1234567890,
owned_by="PrivateMode.ai",
provider="PrivateMode.ai",
capabilities=["tee", "chat", "function_calling"],
context_window=32768,
max_output_tokens=4096,
supports_streaming=True,
supports_function_calling=True
),
Model(
id="privatemode-embeddings",
object="model",
created=1234567890,
owned_by="PrivateMode.ai",
provider="PrivateMode.ai",
capabilities=["tee", "embeddings"],
context_window=512,
supports_streaming=False,
supports_function_calling=False
)
]
with patch("app.services.llm.service.llm_service.get_models") as mock_models_call:
mock_models_call.return_value = mock_models
response = await client.get(
"/api/v1/llm/models",
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 200
data = response.json()
# Verify model capabilities
assert len(data["data"]) == 2
# Check chat model capabilities
chat_model = next(m for m in data["data"] if m["id"] == "privatemode-llama-3-70b")
assert "tee" in chat_model["capabilities"]
assert "chat" in chat_model["capabilities"]
assert chat_model["supports_streaming"] is True
assert chat_model["supports_function_calling"] is True
assert chat_model["context_window"] == 32768
# Check embedding model capabilities
embed_model = next(m for m in data["data"] if m["id"] == "privatemode-embeddings")
assert "tee" in embed_model["capabilities"]
assert "embeddings" in embed_model["capabilities"]
assert embed_model["supports_streaming"] is False
assert embed_model["context_window"] == 512
@pytest.mark.asyncio
async def test_concurrent_requests(self, client: AsyncClient):
"""Test handling of concurrent requests."""
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
mock_response = ChatCompletionResponse(
id="test-concurrent",
object="chat.completion",
created=int(time.time()),
model="privatemode-llama-3-70b",
choices=[
ChatChoice(
index=0,
message=ChatMessage(
role="assistant",
content="Concurrent response"
),
finish_reason="stop"
)
],
usage=Usage(
prompt_tokens=5,
completion_tokens=3,
total_tokens=8
)
)
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
mock_chat.return_value = mock_response
# Create multiple concurrent requests
tasks = []
for i in range(5):
task = client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": f"Concurrent test {i}"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
tasks.append(task)
# Execute all requests concurrently
responses = await asyncio.gather(*tasks)
# Verify all requests succeeded
for response in responses:
assert response.status_code == 200
data = response.json()
assert "choices" in data
assert data["choices"][0]["message"]["content"] == "Concurrent response"
@pytest.mark.asyncio
async def test_budget_enforcement_integration(self, client: AsyncClient):
"""Test budget enforcement integration with LLM service."""
# Test budget exceeded scenario
with patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
mock_budget.side_effect = Exception("Monthly budget limit exceeded")
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": "Test budget enforcement"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 402 # Payment required
# Test budget warning scenario
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
mock_response = ChatCompletionResponse(
id="test-budget-warning",
object="chat.completion",
created=int(time.time()),
model="privatemode-llama-3-70b",
choices=[
ChatChoice(
index=0,
message=ChatMessage(
role="assistant",
content="Response with budget warning"
),
finish_reason="stop"
)
],
usage=Usage(
prompt_tokens=10,
completion_tokens=8,
total_tokens=18
),
budget_warnings=["Approaching monthly budget limit (85% used)"]
)
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat, \
patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
mock_chat.return_value = mock_response
mock_budget.return_value = True # Budget check passes but with warning
response = await client.post(
"/api/v1/llm/chat/completions",
json={
"model": "privatemode-llama-3-70b",
"messages": [
{"role": "user", "content": "Test budget warning"}
]
},
headers={"Authorization": "Bearer test-api-key"}
)
assert response.status_code == 200
data = response.json()
assert "budget_warnings" in data
assert len(data["budget_warnings"]) > 0
assert "85%" in data["budget_warnings"][0]