mirror of
https://github.com/aljazceru/enclava.git
synced 2025-12-17 15:34:36 +01:00
clean commit
This commit is contained in:
131
backend/tests/performance/quick_perf_test.py
Normal file
131
backend/tests/performance/quick_perf_test.py
Normal file
@@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick performance comparison test
|
||||
"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import json
|
||||
|
||||
# Configuration
|
||||
API_KEY = "ce_mMJNyEznKHJRvvNyyuwuQotuWJ2BvdD8"
|
||||
LITELLM_KEY = "shifra-master-key"
|
||||
TEST_PROMPT = "What is 2+2? Answer briefly."
|
||||
MODEL = "ollama-deepseek-r1"
|
||||
|
||||
def test_platform_api():
|
||||
"""Test platform API"""
|
||||
print("🔄 Testing Platform API...")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {API_KEY}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": TEST_PROMPT}],
|
||||
"max_tokens": 30
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
response = requests.post("http://localhost:58000/api/v1/llm/chat/completions",
|
||||
headers=headers, json=payload, timeout=30)
|
||||
end_time = time.time()
|
||||
|
||||
response_time = end_time - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
tokens = data.get('usage', {}).get('total_tokens', 0)
|
||||
content = data.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||
print(f"✅ Platform API: {response_time:.3f}s ({tokens} tokens)")
|
||||
print(f" Response: {content[:100]}...")
|
||||
return response_time, tokens
|
||||
else:
|
||||
print(f"❌ Platform API failed: {response.status_code} - {response.text}")
|
||||
return None, None
|
||||
|
||||
def test_litellm_direct():
|
||||
"""Test LiteLLM direct"""
|
||||
print("🔄 Testing LiteLLM Direct...")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {LITELLM_KEY}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": MODEL,
|
||||
"messages": [{"role": "user", "content": TEST_PROMPT}],
|
||||
"max_tokens": 30
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
response = requests.post("http://localhost:54000/chat/completions",
|
||||
headers=headers, json=payload, timeout=30)
|
||||
end_time = time.time()
|
||||
|
||||
response_time = end_time - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
tokens = data.get('usage', {}).get('total_tokens', 0)
|
||||
content = data.get('choices', [{}])[0].get('message', {}).get('content', 'No response')
|
||||
print(f"✅ LiteLLM Direct: {response_time:.3f}s ({tokens} tokens)")
|
||||
print(f" Response: {content[:100]}...")
|
||||
return response_time, tokens
|
||||
else:
|
||||
print(f"❌ LiteLLM Direct failed: {response.status_code} - {response.text}")
|
||||
return None, None
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("QUICK PERFORMANCE COMPARISON")
|
||||
print("=" * 60)
|
||||
print(f"Prompt: {TEST_PROMPT}")
|
||||
print(f"Model: {MODEL}")
|
||||
print()
|
||||
|
||||
# Test platform API
|
||||
platform_time, platform_tokens = test_platform_api()
|
||||
print()
|
||||
|
||||
# Test LiteLLM direct
|
||||
litellm_time, litellm_tokens = test_litellm_direct()
|
||||
print()
|
||||
|
||||
# Compare results
|
||||
if platform_time and litellm_time:
|
||||
overhead = platform_time - litellm_time
|
||||
overhead_percent = (overhead / litellm_time) * 100
|
||||
|
||||
print("=" * 60)
|
||||
print("COMPARISON RESULTS")
|
||||
print("=" * 60)
|
||||
print(f"Platform API: {platform_time:.3f}s")
|
||||
print(f"LiteLLM Direct: {litellm_time:.3f}s")
|
||||
print(f"Platform Overhead: {overhead:.3f}s ({overhead_percent:+.1f}%)")
|
||||
print()
|
||||
|
||||
if overhead_percent < 10:
|
||||
print("🚀 EXCELLENT: Platform adds minimal overhead!")
|
||||
elif overhead_percent < 25:
|
||||
print("⚡ GOOD: Platform adds reasonable overhead")
|
||||
elif overhead_percent < 50:
|
||||
print("⚠️ MODERATE: Platform adds noticeable overhead")
|
||||
else:
|
||||
print("🐌 HIGH: Platform adds significant overhead")
|
||||
|
||||
print()
|
||||
print("Platform overhead includes:")
|
||||
print(" • API key authentication & validation")
|
||||
print(" • Budget enforcement & usage tracking")
|
||||
print(" • Request logging & analytics")
|
||||
print(" • Rate limiting checks")
|
||||
print(" • Database operations")
|
||||
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user