mirror of
https://github.com/aljazceru/enclava.git
synced 2025-12-19 00:14:24 +01:00
206 lines
9.1 KiB
Python
206 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Ollama Integration Test
|
||
Tests that Ollama models work properly through the LiteLLM proxy
|
||
"""
|
||
|
||
import asyncio
|
||
import aiohttp
|
||
import json
|
||
import time
|
||
|
||
# Ollama models from litellm_config.yaml
|
||
OLLAMA_MODELS = [
|
||
"ollama-llama-3.1-nemotron",
|
||
"ollama-mistral-nemo",
|
||
"ollama-gemini-2.0-flash",
|
||
"ollama-qwen3-235b",
|
||
"ollama-deepseek-r1",
|
||
"ollama-mistral-small",
|
||
"ollama-gemini-2.5-pro"
|
||
]
|
||
|
||
async def test_ollama_integration():
|
||
async with aiohttp.ClientSession() as session:
|
||
try:
|
||
# Register and login a test user
|
||
timestamp = int(time.time())
|
||
user_data = {
|
||
"email": f"ollamatest{timestamp}@example.com",
|
||
"password": "TestPassword123!",
|
||
"username": f"ollamatest{timestamp}"
|
||
}
|
||
|
||
print("🚀 Starting Ollama Integration Test")
|
||
print("=" * 50)
|
||
|
||
# Register user
|
||
async with session.post("http://localhost:58000/api/v1/auth/register", json=user_data) as response:
|
||
if response.status != 201:
|
||
error_data = await response.json()
|
||
print(f"❌ User registration failed: {error_data}")
|
||
return
|
||
print("✅ User registered successfully")
|
||
|
||
# Login
|
||
login_data = {"email": user_data["email"], "password": user_data["password"]}
|
||
async with session.post("http://localhost:58000/api/v1/auth/login", json=login_data) as response:
|
||
if response.status != 200:
|
||
error_data = await response.json()
|
||
print(f"❌ Login failed: {error_data}")
|
||
return
|
||
|
||
login_result = await response.json()
|
||
token = login_result['access_token']
|
||
headers = {'Authorization': f'Bearer {token}'}
|
||
print("✅ Login successful")
|
||
|
||
# Test 1: Check if Ollama models are listed
|
||
print("\n📋 Testing model availability...")
|
||
async with session.get("http://localhost:58000/api/v1/llm/models", headers=headers) as response:
|
||
if response.status == 200:
|
||
models_data = await response.json()
|
||
available_models = [model.get('id', '') for model in models_data.get('data', [])]
|
||
|
||
ollama_available = [model for model in OLLAMA_MODELS if model in available_models]
|
||
print(f"✅ Total models available: {len(available_models)}")
|
||
print(f"✅ Ollama models available: {len(ollama_available)}")
|
||
|
||
if not ollama_available:
|
||
print("❌ No Ollama models found in model list")
|
||
return
|
||
|
||
for model in ollama_available:
|
||
print(f" • {model}")
|
||
else:
|
||
error_data = await response.json()
|
||
print(f"❌ Failed to get models: {error_data}")
|
||
return
|
||
|
||
# Test 2: Test chat completions with each available Ollama model
|
||
print(f"\n💬 Testing chat completions...")
|
||
successful_models = []
|
||
failed_models = []
|
||
|
||
test_messages = [
|
||
{"role": "user", "content": "Say 'Hello from Ollama!' and nothing else."}
|
||
]
|
||
|
||
for model in ollama_available[:3]: # Test first 3 models to avoid timeout
|
||
print(f"\n🤖 Testing model: {model}")
|
||
|
||
chat_data = {
|
||
"model": model,
|
||
"messages": test_messages,
|
||
"max_tokens": 50,
|
||
"temperature": 0.1
|
||
}
|
||
|
||
try:
|
||
async with session.post(
|
||
"http://localhost:58000/api/v1/llm/chat/completions",
|
||
json=chat_data,
|
||
headers=headers,
|
||
timeout=aiohttp.ClientTimeout(total=30)
|
||
) as chat_response:
|
||
if chat_response.status == 200:
|
||
chat_result = await chat_response.json()
|
||
message = chat_result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||
print(f" ✅ Response: {message.strip()[:100]}...")
|
||
successful_models.append(model)
|
||
else:
|
||
error_data = await chat_response.json()
|
||
print(f" ❌ Failed (HTTP {chat_response.status}): {error_data.get('detail', 'Unknown error')}")
|
||
failed_models.append(model)
|
||
|
||
except asyncio.TimeoutError:
|
||
print(f" ⏰ Timeout - model may be loading or unavailable")
|
||
failed_models.append(model)
|
||
except Exception as e:
|
||
print(f" ❌ Error: {str(e)}")
|
||
failed_models.append(model)
|
||
|
||
# Small delay between requests
|
||
await asyncio.sleep(1)
|
||
|
||
# Test 3: Test streaming response (if supported)
|
||
print(f"\n🌊 Testing streaming response...")
|
||
if successful_models:
|
||
test_model = successful_models[0]
|
||
stream_data = {
|
||
"model": test_model,
|
||
"messages": [{"role": "user", "content": "Count from 1 to 3, one number per line."}],
|
||
"max_tokens": 20,
|
||
"stream": True
|
||
}
|
||
|
||
try:
|
||
async with session.post(
|
||
"http://localhost:58000/api/v1/llm/chat/completions",
|
||
json=stream_data,
|
||
headers=headers,
|
||
timeout=aiohttp.ClientTimeout(total=20)
|
||
) as stream_response:
|
||
if stream_response.status == 200:
|
||
content = await stream_response.text()
|
||
if "data:" in content:
|
||
print(f" ✅ Streaming response received (partial): {content[:100]}...")
|
||
else:
|
||
print(f" ℹ️ Non-streaming response: {content[:100]}...")
|
||
else:
|
||
error_data = await stream_response.json()
|
||
print(f" ❌ Streaming failed: {error_data}")
|
||
except Exception as e:
|
||
print(f" ❌ Streaming error: {str(e)}")
|
||
|
||
# Test 4: Test model with different parameters
|
||
print(f"\n⚙️ Testing model parameters...")
|
||
if successful_models:
|
||
test_model = successful_models[0]
|
||
param_tests = [
|
||
{"temperature": 0.0, "max_tokens": 10},
|
||
{"temperature": 0.8, "max_tokens": 30},
|
||
]
|
||
|
||
for i, params in enumerate(param_tests):
|
||
chat_data = {
|
||
"model": test_model,
|
||
"messages": [{"role": "user", "content": f"Test {i+1}: Say hello briefly."}],
|
||
**params
|
||
}
|
||
|
||
try:
|
||
async with session.post(
|
||
"http://localhost:58000/api/v1/llm/chat/completions",
|
||
json=chat_data,
|
||
headers=headers,
|
||
timeout=aiohttp.ClientTimeout(total=15)
|
||
) as response:
|
||
if response.status == 200:
|
||
result = await response.json()
|
||
message = result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||
print(f" ✅ Params {params}: {message.strip()[:50]}...")
|
||
else:
|
||
print(f" ❌ Params test failed: HTTP {response.status}")
|
||
except Exception as e:
|
||
print(f" ❌ Parameters test error: {str(e)}")
|
||
|
||
# Summary
|
||
print(f"\n📊 Test Summary")
|
||
print("=" * 50)
|
||
print(f"✅ Successful models: {len(successful_models)}")
|
||
for model in successful_models:
|
||
print(f" • {model}")
|
||
|
||
if failed_models:
|
||
print(f"❌ Failed models: {len(failed_models)}")
|
||
for model in failed_models:
|
||
print(f" • {model}")
|
||
|
||
print(f"\n{'🎉 Ollama integration working!' if successful_models else '⚠️ Ollama integration has issues'}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ Test error: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(test_ollama_integration()) |