mirror of
https://github.com/aljazceru/enclava.git
synced 2025-12-17 07:24:34 +01:00
removing lite llm and going directly for privatemode
This commit is contained in:
496
backend/tests/integration/test_llm_service_integration.py
Normal file
496
backend/tests/integration/test_llm_service_integration.py
Normal file
@@ -0,0 +1,496 @@
|
||||
"""
|
||||
Integration tests for the new LLM service.
|
||||
Tests end-to-end functionality including provider integration, security, and performance.
|
||||
"""
|
||||
import pytest
|
||||
import asyncio
|
||||
import time
|
||||
from httpx import AsyncClient
|
||||
from unittest.mock import patch, AsyncMock, MagicMock
|
||||
import json
|
||||
|
||||
|
||||
class TestLLMServiceIntegration:
|
||||
"""Integration tests for LLM service."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_full_chat_flow(self, client: AsyncClient):
|
||||
"""Test complete chat completion flow with security and budget checks."""
|
||||
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
|
||||
|
||||
# Mock successful LLM service response
|
||||
mock_response = ChatCompletionResponse(
|
||||
id="test-completion-123",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model="privatemode-llama-3-70b",
|
||||
choices=[
|
||||
ChatChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content="Hello! I'm a TEE-protected AI assistant. How can I help you today?"
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=25,
|
||||
completion_tokens=15,
|
||||
total_tokens=40
|
||||
),
|
||||
security_analysis={
|
||||
"risk_score": 0.1,
|
||||
"threats_detected": [],
|
||||
"risk_level": "low",
|
||||
"analysis_time_ms": 12.5
|
||||
}
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat, \
|
||||
patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
|
||||
|
||||
mock_chat.return_value = mock_response
|
||||
mock_budget.return_value = True # Budget check passes
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello, what are your capabilities?"}
|
||||
],
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 150
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
# Verify response structure
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Check standard OpenAI-compatible fields
|
||||
assert "id" in data
|
||||
assert "object" in data
|
||||
assert "created" in data
|
||||
assert "model" in data
|
||||
assert "choices" in data
|
||||
assert "usage" in data
|
||||
|
||||
# Check security integration
|
||||
assert "security_analysis" in data
|
||||
assert data["security_analysis"]["risk_level"] == "low"
|
||||
|
||||
# Verify content
|
||||
assert len(data["choices"]) == 1
|
||||
assert data["choices"][0]["message"]["role"] == "assistant"
|
||||
assert "TEE-protected" in data["choices"][0]["message"]["content"]
|
||||
|
||||
# Verify usage tracking
|
||||
assert data["usage"]["total_tokens"] == 40
|
||||
assert data["usage"]["prompt_tokens"] == 25
|
||||
assert data["usage"]["completion_tokens"] == 15
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_embedding_integration(self, client: AsyncClient):
|
||||
"""Test embedding generation with fallback handling."""
|
||||
from app.services.llm.models import EmbeddingResponse, EmbeddingData, Usage
|
||||
|
||||
# Create realistic 1024-dimensional embedding
|
||||
embedding_vector = [0.1 * i for i in range(1024)]
|
||||
|
||||
mock_response = EmbeddingResponse(
|
||||
object="list",
|
||||
data=[
|
||||
EmbeddingData(
|
||||
object="embedding",
|
||||
embedding=embedding_vector,
|
||||
index=0
|
||||
)
|
||||
],
|
||||
model="privatemode-embeddings",
|
||||
usage=Usage(
|
||||
prompt_tokens=8,
|
||||
total_tokens=8
|
||||
)
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_embedding") as mock_embedding:
|
||||
mock_embedding.return_value = mock_response
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/embeddings",
|
||||
json={
|
||||
"model": "privatemode-embeddings",
|
||||
"input": "This is a test document for embedding generation."
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Verify embedding structure
|
||||
assert "object" in data
|
||||
assert "data" in data
|
||||
assert "usage" in data
|
||||
assert len(data["data"]) == 1
|
||||
assert len(data["data"][0]["embedding"]) == 1024
|
||||
assert data["data"][0]["index"] == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_provider_health_integration(self, client: AsyncClient):
|
||||
"""Test provider health monitoring integration."""
|
||||
mock_status = {
|
||||
"privatemode": {
|
||||
"provider": "PrivateMode.ai",
|
||||
"status": "healthy",
|
||||
"latency_ms": 245.8,
|
||||
"success_rate": 0.987,
|
||||
"last_check": "2025-01-01T12:00:00Z",
|
||||
"error_message": None,
|
||||
"models_available": [
|
||||
"privatemode-llama-3-70b",
|
||||
"privatemode-claude-3-sonnet",
|
||||
"privatemode-gpt-4o",
|
||||
"privatemode-embeddings"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
with patch("app.services.llm.service.llm_service.get_provider_status") as mock_provider:
|
||||
mock_provider.return_value = mock_status
|
||||
|
||||
response = await client.get(
|
||||
"/api/v1/llm/providers/status",
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Check response structure
|
||||
assert "data" in data
|
||||
assert "privatemode" in data["data"]
|
||||
|
||||
provider_data = data["data"]["privatemode"]
|
||||
assert provider_data["status"] == "healthy"
|
||||
assert provider_data["latency_ms"] < 300 # Reasonable latency
|
||||
assert provider_data["success_rate"] > 0.95 # High success rate
|
||||
assert len(provider_data["models_available"]) >= 4
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_handling_and_fallback(self, client: AsyncClient):
|
||||
"""Test error handling and fallback scenarios."""
|
||||
# Test provider unavailable scenario
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
|
||||
mock_chat.side_effect = Exception("Provider temporarily unavailable")
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
# Should return error but not crash
|
||||
assert response.status_code in [500, 503] # Server error or service unavailable
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_security_threat_detection(self, client: AsyncClient):
|
||||
"""Test security threat detection integration."""
|
||||
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
|
||||
|
||||
# Mock response with security threat detected
|
||||
mock_response = ChatCompletionResponse(
|
||||
id="test-completion-security",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model="privatemode-llama-3-70b",
|
||||
choices=[
|
||||
ChatChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content="I cannot help with that request as it violates security policies."
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=15,
|
||||
completion_tokens=12,
|
||||
total_tokens=27
|
||||
),
|
||||
security_analysis={
|
||||
"risk_score": 0.8,
|
||||
"threats_detected": ["potential_malicious_code"],
|
||||
"risk_level": "high",
|
||||
"blocked": True,
|
||||
"analysis_time_ms": 45.2
|
||||
}
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
|
||||
mock_chat.return_value = mock_response
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": "How to create malicious code?"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200 # Request succeeds but content is filtered
|
||||
data = response.json()
|
||||
|
||||
# Verify security analysis
|
||||
assert "security_analysis" in data
|
||||
assert data["security_analysis"]["risk_level"] == "high"
|
||||
assert data["security_analysis"]["blocked"] is True
|
||||
assert "malicious" in data["security_analysis"]["threats_detected"][0]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_performance_characteristics(self, client: AsyncClient):
|
||||
"""Test performance characteristics of the LLM service."""
|
||||
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
|
||||
|
||||
# Mock fast response
|
||||
mock_response = ChatCompletionResponse(
|
||||
id="test-perf",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model="privatemode-llama-3-70b",
|
||||
choices=[
|
||||
ChatChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content="Quick response for performance testing."
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=8,
|
||||
total_tokens=18
|
||||
)
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
|
||||
mock_chat.return_value = mock_response
|
||||
|
||||
# Measure response time
|
||||
start_time = time.time()
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Quick test"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
response_time = time.time() - start_time
|
||||
|
||||
assert response.status_code == 200
|
||||
# API should respond quickly (mocked, so should be very fast)
|
||||
assert response_time < 1.0 # Less than 1 second for mocked response
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_model_capabilities_detection(self, client: AsyncClient):
|
||||
"""Test model capabilities detection and reporting."""
|
||||
from app.services.llm.models import Model
|
||||
|
||||
mock_models = [
|
||||
Model(
|
||||
id="privatemode-llama-3-70b",
|
||||
object="model",
|
||||
created=1234567890,
|
||||
owned_by="PrivateMode.ai",
|
||||
provider="PrivateMode.ai",
|
||||
capabilities=["tee", "chat", "function_calling"],
|
||||
context_window=32768,
|
||||
max_output_tokens=4096,
|
||||
supports_streaming=True,
|
||||
supports_function_calling=True
|
||||
),
|
||||
Model(
|
||||
id="privatemode-embeddings",
|
||||
object="model",
|
||||
created=1234567890,
|
||||
owned_by="PrivateMode.ai",
|
||||
provider="PrivateMode.ai",
|
||||
capabilities=["tee", "embeddings"],
|
||||
context_window=512,
|
||||
supports_streaming=False,
|
||||
supports_function_calling=False
|
||||
)
|
||||
]
|
||||
|
||||
with patch("app.services.llm.service.llm_service.get_models") as mock_models_call:
|
||||
mock_models_call.return_value = mock_models
|
||||
|
||||
response = await client.get(
|
||||
"/api/v1/llm/models",
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
|
||||
# Verify model capabilities
|
||||
assert len(data["data"]) == 2
|
||||
|
||||
# Check chat model capabilities
|
||||
chat_model = next(m for m in data["data"] if m["id"] == "privatemode-llama-3-70b")
|
||||
assert "tee" in chat_model["capabilities"]
|
||||
assert "chat" in chat_model["capabilities"]
|
||||
assert chat_model["supports_streaming"] is True
|
||||
assert chat_model["supports_function_calling"] is True
|
||||
assert chat_model["context_window"] == 32768
|
||||
|
||||
# Check embedding model capabilities
|
||||
embed_model = next(m for m in data["data"] if m["id"] == "privatemode-embeddings")
|
||||
assert "tee" in embed_model["capabilities"]
|
||||
assert "embeddings" in embed_model["capabilities"]
|
||||
assert embed_model["supports_streaming"] is False
|
||||
assert embed_model["context_window"] == 512
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_requests(self, client: AsyncClient):
|
||||
"""Test handling of concurrent requests."""
|
||||
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
|
||||
|
||||
mock_response = ChatCompletionResponse(
|
||||
id="test-concurrent",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model="privatemode-llama-3-70b",
|
||||
choices=[
|
||||
ChatChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content="Concurrent response"
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=5,
|
||||
completion_tokens=3,
|
||||
total_tokens=8
|
||||
)
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat:
|
||||
mock_chat.return_value = mock_response
|
||||
|
||||
# Create multiple concurrent requests
|
||||
tasks = []
|
||||
for i in range(5):
|
||||
task = client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": f"Concurrent test {i}"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
# Execute all requests concurrently
|
||||
responses = await asyncio.gather(*tasks)
|
||||
|
||||
# Verify all requests succeeded
|
||||
for response in responses:
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "choices" in data
|
||||
assert data["choices"][0]["message"]["content"] == "Concurrent response"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_budget_enforcement_integration(self, client: AsyncClient):
|
||||
"""Test budget enforcement integration with LLM service."""
|
||||
# Test budget exceeded scenario
|
||||
with patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
|
||||
mock_budget.side_effect = Exception("Monthly budget limit exceeded")
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Test budget enforcement"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 402 # Payment required
|
||||
|
||||
# Test budget warning scenario
|
||||
from app.services.llm.models import ChatCompletionResponse, ChatChoice, ChatMessage, Usage
|
||||
|
||||
mock_response = ChatCompletionResponse(
|
||||
id="test-budget-warning",
|
||||
object="chat.completion",
|
||||
created=int(time.time()),
|
||||
model="privatemode-llama-3-70b",
|
||||
choices=[
|
||||
ChatChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content="Response with budget warning"
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=8,
|
||||
total_tokens=18
|
||||
),
|
||||
budget_warnings=["Approaching monthly budget limit (85% used)"]
|
||||
)
|
||||
|
||||
with patch("app.services.llm.service.llm_service.create_chat_completion") as mock_chat, \
|
||||
patch("app.services.budget_enforcement.BudgetEnforcementService.check_budget_compliance") as mock_budget:
|
||||
|
||||
mock_chat.return_value = mock_response
|
||||
mock_budget.return_value = True # Budget check passes but with warning
|
||||
|
||||
response = await client.post(
|
||||
"/api/v1/llm/chat/completions",
|
||||
json={
|
||||
"model": "privatemode-llama-3-70b",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Test budget warning"}
|
||||
]
|
||||
},
|
||||
headers={"Authorization": "Bearer test-api-key"}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "budget_warnings" in data
|
||||
assert len(data["budget_warnings"]) > 0
|
||||
assert "85%" in data["budget_warnings"][0]
|
||||
Reference in New Issue
Block a user