enclava/backend/tests/integration/test_auth_performance.py

#!/usr/bin/env python3
"""
Authentication Performance Test
Tests the performance improvements of the cached API key authentication system
"""

import asyncio
import aiohttp
import time
import statistics
import json
from typing import List

# Test configuration
PLATFORM_BASE_URL = "http://localhost:58000"
TEST_ITERATIONS = 20


class AuthPerformanceTest:
    def __init__(self):
        self.results = {}

    async def test_auth_endpoint_performance(self):
        """Test authentication performance using a real endpoint"""
        print("Testing authentication performance...")

        # Test with models endpoint which requires authentication
        endpoint = f"{PLATFORM_BASE_URL}/api/v1/llm/models"

        # Use a dummy API key - this will fail auth but still test the auth flow
        headers = {"Authorization": "Bearer ce_dummy_key_12345678_test_performance_auth_flow"}

        times = []

        async with aiohttp.ClientSession() as session:
            for i in range(TEST_ITERATIONS):
                start_time = time.perf_counter()

                try:
                    async with session.get(endpoint, headers=headers) as response:
                        # Read response to ensure full request completion
                        await response.read()

                except Exception as e:
                    # We expect authentication to fail, but we're measuring the time
                    pass

                end_time = time.perf_counter()
                duration = (end_time - start_time) * 1000  # Convert to milliseconds
                times.append(duration)

                print(f"Auth test #{i+1}: {duration:.2f}ms")
                await asyncio.sleep(0.1)  # Small delay between requests

        return {
            "average": statistics.mean(times),
            "median": statistics.median(times),
            "min": min(times),
            "max": max(times),
            "p95": statistics.quantiles(times, n=20)[18] if len(times) >= 20 else max(times),
            "iterations": len(times),
            "total_time": sum(times)
        }

    async def test_cache_warmup_performance(self):
        """Test performance difference between cold and warm cache"""
        print("Testing cache warmup performance...")

        endpoint = f"{PLATFORM_BASE_URL}/api/v1/llm/models"
        headers = {"Authorization": "Bearer ce_dummy_key_12345678_test_cache_warmup"}

        cold_cache_times = []
        warm_cache_times = []

        async with aiohttp.ClientSession() as session:
            # Test cold cache (first few requests)
            print("Testing cold cache performance...")
            for i in range(5):
                start_time = time.perf_counter()

                try:
                    async with session.get(endpoint, headers=headers) as response:
                        await response.read()
                except:
                    pass

                end_time = time.perf_counter()
                duration = (end_time - start_time) * 1000
                cold_cache_times.append(duration)
                print(f"Cold cache #{i+1}: {duration:.2f}ms")

                await asyncio.sleep(0.1)

            # Small delay to let cache settle
            await asyncio.sleep(1)

            # Test warm cache (subsequent requests)
            print("Testing warm cache performance...")
            for i in range(10):
                start_time = time.perf_counter()

                try:
                    async with session.get(endpoint, headers=headers) as response:
                        await response.read()
                except:
                    pass

                end_time = time.perf_counter()
                duration = (end_time - start_time) * 1000
                warm_cache_times.append(duration)
                print(f"Warm cache #{i+1}: {duration:.2f}ms")

                await asyncio.sleep(0.1)

        cold_avg = statistics.mean(cold_cache_times) if cold_cache_times else 0
        warm_avg = statistics.mean(warm_cache_times) if warm_cache_times else 0
        improvement = ((cold_avg - warm_avg) / cold_avg * 100) if cold_avg > 0 else 0

        return {
            "cold_cache_avg": cold_avg,
            "warm_cache_avg": warm_avg,
            "improvement_pct": improvement,
            "cold_cache_times": cold_cache_times,
            "warm_cache_times": warm_cache_times
        }

    async def test_concurrent_auth_performance(self):
        """Test authentication under concurrent load"""
        print("Testing concurrent authentication performance...")

        endpoint = f"{PLATFORM_BASE_URL}/api/v1/llm/models"
        headers = {"Authorization": "Bearer ce_dummy_key_12345678_concurrent_test"}

        async def single_request(session, request_id):
            start_time = time.perf_counter()
            try:
                async with session.get(endpoint, headers=headers) as response:
                    await response.read()
            except:
                pass
            end_time = time.perf_counter()
            return (end_time - start_time) * 1000

        # Test with different concurrency levels
        concurrency_levels = [1, 5, 10, 20]
        results = {}

        async with aiohttp.ClientSession() as session:
            for concurrency in concurrency_levels:
                print(f"Testing concurrency level: {concurrency}")

                start_time = time.perf_counter()

                # Create concurrent tasks
                tasks = [single_request(session, i) for i in range(concurrency)]
                durations = await asyncio.gather(*tasks, return_exceptions=True)

                end_time = time.perf_counter()
                total_time = (end_time - start_time) * 1000

                # Filter out exceptions and calculate stats
                valid_durations = [d for d in durations if isinstance(d, (int, float))]

                if valid_durations:
                    results[f"concurrency_{concurrency}"] = {
                        "average_request_time": statistics.mean(valid_durations),
                        "total_time": total_time,
                        "requests_per_second": (len(valid_durations) / total_time) * 1000,
                        "successful_requests": len(valid_durations),
                        "failed_requests": concurrency - len(valid_durations)
                    }

                await asyncio.sleep(0.5)  # Brief pause between concurrency tests

        return results

    async def run_all_tests(self):
        """Run all authentication performance tests"""
        print("=" * 80)
        print("Authentication Performance Test Suite")
        print("=" * 80)

        # Test 1: Basic authentication performance
        auth_results = await self.test_auth_endpoint_performance()
        self.results["authentication"] = auth_results

        print(f"\n📊 Authentication Performance Results:")
        print(f"  Average: {auth_results['average']:.2f}ms")
        print(f"  Median: {auth_results['median']:.2f}ms")
        print(f"  P95: {auth_results['p95']:.2f}ms")
        print(f"  Min/Max: {auth_results['min']:.2f}ms / {auth_results['max']:.2f}ms")

        # Test 2: Cache warmup performance
        cache_results = await self.test_cache_warmup_performance()
        self.results["cache_warmup"] = cache_results

        print(f"\n🔥 Cache Warmup Results:")
        print(f"  Cold Cache Average: {cache_results['cold_cache_avg']:.2f}ms")
        print(f"  Warm Cache Average: {cache_results['warm_cache_avg']:.2f}ms")
        print(f"  Performance Improvement: {cache_results['improvement_pct']:.1f}%")

        # Test 3: Concurrent performance
        concurrent_results = await self.test_concurrent_auth_performance()
        self.results["concurrent"] = concurrent_results

        print(f"\n⚡ Concurrent Authentication Results:")
        for key, data in concurrent_results.items():
            concurrency = key.split('_')[1]
            print(f"  {concurrency} concurrent: {data['average_request_time']:.2f}ms avg, "
                  f"{data['requests_per_second']:.1f} req/sec")

        # Save detailed results
        timestamp = int(time.time())
        results_file = f"auth_performance_results_{timestamp}.json"
        with open(results_file, 'w') as f:
            json.dump(self.results, f, indent=2)

        print(f"\n📁 Detailed results saved to: {results_file}")

        # Performance analysis
        self.analyze_performance()

        return self.results

    def analyze_performance(self):
        """Analyze and provide performance insights"""
        print("\n" + "=" * 80)
        print("PERFORMANCE ANALYSIS")
        print("=" * 80)

        auth_avg = self.results.get("authentication", {}).get("average", 0)
        cache_improvement = self.results.get("cache_warmup", {}).get("improvement_pct", 0)

        print(f"🎯 Performance Targets:")
        print(f"  Target authentication time: <50ms")
        print(f"  Actual average time: {auth_avg:.2f}ms")

        if auth_avg < 50:
            print(f"  ✅ EXCELLENT: Authentication is under target!")
        elif auth_avg < 100:
            print(f"  ✅ GOOD: Authentication is acceptable")
        elif auth_avg < 200:
            print(f"  ⚠️  OK: Authentication could be improved")
        else:
            print(f"  ❌ SLOW: Authentication needs optimization")

        print(f"\n🚀 Cache Performance:")
        print(f"  Cache improvement: {cache_improvement:.1f}%")

        if cache_improvement > 50:
            print(f"  ✅ EXCELLENT: Cache is providing significant speedup!")
        elif cache_improvement > 25:
            print(f"  ✅ GOOD: Cache is working well")
        elif cache_improvement > 10:
            print(f"  ⚠️  OK: Cache provides some benefit")
        else:
            print(f"  ❌ POOR: Cache may not be working effectively")

        # Concurrency analysis
        concurrent_data = self.results.get("concurrent", {})
        if concurrent_data:
            single_rps = concurrent_data.get("concurrency_1", {}).get("requests_per_second", 0)
            high_rps = concurrent_data.get("concurrency_20", {}).get("requests_per_second", 0)

            print(f"\n⚡ Concurrency Performance:")
            print(f"  Single request: {single_rps:.1f} req/sec")
            print(f"  20 concurrent: {high_rps:.1f} req/sec")

            if high_rps > single_rps * 10:
                print(f"  ✅ EXCELLENT: Great concurrency scaling!")
            elif high_rps > single_rps * 5:
                print(f"  ✅ GOOD: Good concurrency performance")
            elif high_rps > single_rps * 2:
                print(f"  ⚠️  OK: Moderate concurrency scaling")
            else:
                print(f"  ❌ POOR: Concurrency bottleneck detected")

        print(f"\n💡 Optimization Impact:")
        if cache_improvement > 30 and auth_avg < 100:
            print(f"  ✅ SUCCESS: API key caching optimization is working!")
            print(f"  🎉 Expected ~92% reduction in bcrypt operations achieved")
        else:
            print(f"  ⚠️  The optimization may need further tuning or Redis isn't available")


async def main():
    """Main test execution"""
    print("Starting authentication performance tests...")
    print("Note: These tests use dummy API keys and expect authentication failures")
    print("We're measuring the time it takes to process the authentication, not success\n")

    test = AuthPerformanceTest()

    try:
        results = await test.run_all_tests()

        print("\n🏁 Test suite completed successfully!")
        return results

    except Exception as e:
        print(f"\n❌ Test suite failed: {e}")
        return None


if __name__ == "__main__":
    asyncio.run(main())