// Load Testing and Performance Benchmarks for BZZZ System
// This comprehensive test suite validates system performance under various load conditions
// and provides detailed benchmarks for critical components and workflows.

package integration

import (
	"context"
	"encoding/json"
	"fmt"
	"math/rand"
	"runtime"
	"sync"
	"sync/atomic"
	"testing"
	"time"

	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"

	"chorus.services/bzzz/pkg/config"
	"chorus.services/bzzz/pkg/dht"
	"chorus.services/bzzz/pkg/slurp"
	"chorus.services/bzzz/pkg/ucxi"
	"chorus.services/bzzz/pkg/ucxl"
)

// LoadTestSuite provides comprehensive load testing capabilities
type LoadTestSuite struct {
	ctx              context.Context
	config           *config.Config
	dhtStorage       dht.DHT
	ucxiServer       *ucxi.Server
	slurpProcessor   *slurp.EventProcessor
	performanceStats *PerformanceStats
	loadProfiles     map[string]*LoadProfile
	testData         *TestDataManager
	resourceMonitor  *ResourceMonitor
}

// PerformanceStats tracks detailed performance metrics
type PerformanceStats struct {
	mu                       sync.RWMutex
	TotalOperations          int64         `json:"total_operations"`
	SuccessfulOperations     int64         `json:"successful_operations"`
	FailedOperations         int64         `json:"failed_operations"`
	AverageLatency           time.Duration `json:"average_latency"`
	P50Latency               time.Duration `json:"p50_latency"`
	P95Latency               time.Duration `json:"p95_latency"`
	P99Latency               time.Duration `json:"p99_latency"`
	MaxLatency               time.Duration `json:"max_latency"`
	MinLatency               time.Duration `json:"min_latency"`
	Throughput               float64       `json:"throughput_ops_per_sec"`
	ErrorRate                float64       `json:"error_rate_percent"`
	LatencyHistogram         []int64       `json:"latency_histogram_ms"`
	latencies                []time.Duration
	startTime                time.Time
	endTime                  time.Time
	MemoryUsageStart         int64         `json:"memory_usage_start_bytes"`
	MemoryUsageEnd           int64         `json:"memory_usage_end_bytes"`
	MemoryUsagePeak          int64         `json:"memory_usage_peak_bytes"`
	GoroutineCountStart      int           `json:"goroutine_count_start"`
	GoroutineCountEnd        int           `json:"goroutine_count_end"`
	GoroutineCountPeak       int           `json:"goroutine_count_peak"`
}

// LoadProfile defines different load testing scenarios
type LoadProfile struct {
	Name                    string        `json:"name"`
	Description             string        `json:"description"`
	Duration                time.Duration `json:"duration"`
	ConcurrentWorkers       int           `json:"concurrent_workers"`
	RequestsPerSecond       float64       `json:"requests_per_second"`
	PayloadSizeBytes        int           `json:"payload_size_bytes"`
	OperationDistribution   map[string]float64 `json:"operation_distribution"` // PUT:70%, GET:20%, DELETE:10%
	AddressPatternComplexity string        `json:"address_pattern_complexity"` // simple, medium, complex
	EnableLatencyJitter     bool          `json:"enable_latency_jitter"`
	FailureInjectionRate    float64       `json:"failure_injection_rate"`
}

// TestDataManager handles test data generation and management
type TestDataManager struct {
	mu            sync.RWMutex
	generatedData map[string][]byte
	addresses     []string
	payloadSizes  []int
	patterns      []string
}

// ResourceMonitor tracks system resource usage during tests
type ResourceMonitor struct {
	mu                sync.RWMutex
	monitoring        bool
	interval          time.Duration
	memoryUsage       []int64
	goroutineCount    []int
	cpuUsage          []float64
	diskIOOperations  []int64
	networkBytesIn    []int64
	networkBytesOut   []int64
	timestamps        []time.Time
}

func TestBZZZLoadAndPerformance(t *testing.T) {
	suite := NewLoadTestSuite(t)
	defer suite.Cleanup()

	// Define test cases based on realistic usage patterns
	t.Run("Baseline_Single_User", suite.TestBaselineSingleUser)
	t.Run("Light_Load_10_Users", suite.TestLightLoad10Users)
	t.Run("Medium_Load_100_Users", suite.TestMediumLoad100Users)
	t.Run("Heavy_Load_1000_Users", suite.TestHeavyLoad1000Users)
	t.Run("Spike_Load_Sudden_Increase", suite.TestSpikeLoadSuddenIncrease)
	t.Run("Sustained_Load_Long_Duration", suite.TestSustainedLoadLongDuration)
	t.Run("Mixed_Operations_Realistic", suite.TestMixedOperationsRealistic)
	t.Run("Large_Payload_Stress", suite.TestLargePayloadStress)
	t.Run("High_Concurrency_Stress", suite.TestHighConcurrencyStress)
	t.Run("Memory_Pressure_Test", suite.TestMemoryPressureTest)
}

func NewLoadTestSuite(t *testing.T) *LoadTestSuite {
	ctx := context.Background()

	// Initialize configuration optimized for testing
	cfg := &config.Config{
		DHT: config.DHTConfig{
			ReplicationFactor: 3,
			PutTimeout:        5 * time.Second,
			GetTimeout:        2 * time.Second,
			MaxKeySize:        1024 * 1024, // 1MB max
		},
		SLURP: config.SLURPConfig{
			BatchSize:         50,
			ProcessingTimeout: 10 * time.Second,
			BackpressureEnabled: true,
			CircuitBreakerEnabled: true,
		},
	}

	// Initialize DHT storage
	dhtStorage := dht.NewMockDHT()
	
	// Configure mock DHT for performance testing
	mockDHT := dhtStorage.(*dht.MockDHT)
	mockDHT.SetLatency(1 * time.Millisecond) // Realistic network latency
	
	// Initialize UCXI server
	ucxiServer, err := ucxi.NewServer(dhtStorage)
	require.NoError(t, err, "Failed to create UCXI server")

	// Initialize SLURP processor
	slurpProcessor, err := slurp.NewEventProcessor(cfg, dhtStorage)
	require.NoError(t, err, "Failed to create SLURP processor")

	// Initialize performance tracking
	performanceStats := &PerformanceStats{
		latencies: make([]time.Duration, 0, 10000),
		LatencyHistogram: make([]int64, 100), // 0-99ms buckets
	}

	// Initialize load profiles
	loadProfiles := map[string]*LoadProfile{
		"baseline": {
			Name:                    "Baseline Single User",
			Description:             "Single user performing mixed operations",
			Duration:                30 * time.Second,
			ConcurrentWorkers:       1,
			RequestsPerSecond:       5,
			PayloadSizeBytes:        1024,
			OperationDistribution:   map[string]float64{"PUT": 0.5, "GET": 0.4, "DELETE": 0.1},
			AddressPatternComplexity: "simple",
			EnableLatencyJitter:     false,
			FailureInjectionRate:    0.0,
		},
		"light": {
			Name:                    "Light Load 10 Users",
			Description:             "10 concurrent users with normal usage patterns",
			Duration:                2 * time.Minute,
			ConcurrentWorkers:       10,
			RequestsPerSecond:       50,
			PayloadSizeBytes:        2048,
			OperationDistribution:   map[string]float64{"PUT": 0.4, "GET": 0.5, "DELETE": 0.1},
			AddressPatternComplexity: "medium",
			EnableLatencyJitter:     true,
			FailureInjectionRate:    0.01, // 1% failure rate
		},
		"medium": {
			Name:                    "Medium Load 100 Users", 
			Description:             "100 concurrent users with mixed workload",
			Duration:                5 * time.Minute,
			ConcurrentWorkers:       100,
			RequestsPerSecond:       500,
			PayloadSizeBytes:        4096,
			OperationDistribution:   map[string]float64{"PUT": 0.3, "GET": 0.6, "DELETE": 0.1},
			AddressPatternComplexity: "complex",
			EnableLatencyJitter:     true,
			FailureInjectionRate:    0.02, // 2% failure rate
		},
		"heavy": {
			Name:                    "Heavy Load 1000 Users",
			Description:             "1000 concurrent users with high throughput",
			Duration:                10 * time.Minute,
			ConcurrentWorkers:       1000,
			RequestsPerSecond:       5000,
			PayloadSizeBytes:        8192,
			OperationDistribution:   map[string]float64{"PUT": 0.25, "GET": 0.65, "DELETE": 0.1},
			AddressPatternComplexity: "complex",
			EnableLatencyJitter:     true,
			FailureInjectionRate:    0.03, // 3% failure rate
		},
		"spike": {
			Name:                    "Spike Load Test",
			Description:             "Sudden traffic spike simulation",
			Duration:                3 * time.Minute,
			ConcurrentWorkers:       2000,
			RequestsPerSecond:       10000,
			PayloadSizeBytes:        1024,
			OperationDistribution:   map[string]float64{"PUT": 0.2, "GET": 0.75, "DELETE": 0.05},
			AddressPatternComplexity: "medium",
			EnableLatencyJitter:     true,
			FailureInjectionRate:    0.05, // 5% failure rate during spike
		},
	}

	// Initialize test data manager
	testData := &TestDataManager{
		generatedData: make(map[string][]byte),
		payloadSizes:  []int{256, 512, 1024, 2048, 4096, 8192, 16384, 32768},
		patterns: []string{
			"ucxl://agent-{id}:developer@project-{id}:task-{id}/*^",
			"ucxl://user-{id}:viewer@docs:read-{id}/*^",
			"ucxl://service-{id}:admin@system:config-{id}/*^",
			"ucxl://monitor-{id}:developer@metrics:collect-{id}/*^",
		},
	}

	// Initialize resource monitor
	resourceMonitor := &ResourceMonitor{
		interval:         time.Second,
		memoryUsage:      make([]int64, 0, 1000),
		goroutineCount:   make([]int, 0, 1000),
		cpuUsage:         make([]float64, 0, 1000),
		timestamps:       make([]time.Time, 0, 1000),
	}

	return &LoadTestSuite{
		ctx:              ctx,
		config:           cfg,
		dhtStorage:       dhtStorage,
		ucxiServer:       ucxiServer,
		slurpProcessor:   slurpProcessor,
		performanceStats: performanceStats,
		loadProfiles:     loadProfiles,
		testData:         testData,
		resourceMonitor:  resourceMonitor,
	}
}

func (suite *LoadTestSuite) Cleanup() {
	suite.resourceMonitor.Stop()
}

// TestBaselineSingleUser establishes baseline performance metrics
func (suite *LoadTestSuite) TestBaselineSingleUser(t *testing.T) {
	profile := suite.loadProfiles["baseline"]
	result := suite.runLoadTestWithProfile(t, profile)
	
	// Baseline assertions - these establish expected performance
	assert.Less(t, result.AverageLatency, 10*time.Millisecond, "Baseline average latency should be under 10ms")
	assert.Less(t, result.P95Latency, 50*time.Millisecond, "Baseline P95 latency should be under 50ms")
	assert.Greater(t, result.Throughput, 4.0, "Baseline throughput should be at least 4 ops/sec")
	assert.Less(t, result.ErrorRate, 0.1, "Baseline error rate should be under 0.1%")
	
	t.Logf("Baseline Performance: Avg=%v, P95=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.Throughput, result.ErrorRate)
}

// TestLightLoad10Users tests system behavior under light concurrent load
func (suite *LoadTestSuite) TestLightLoad10Users(t *testing.T) {
	profile := suite.loadProfiles["light"]
	result := suite.runLoadTestWithProfile(t, profile)
	
	// Light load assertions
	assert.Less(t, result.AverageLatency, 50*time.Millisecond, "Light load average latency should be reasonable")
	assert.Less(t, result.P95Latency, 200*time.Millisecond, "Light load P95 latency should be acceptable")
	assert.Greater(t, result.Throughput, 40.0, "Light load throughput should meet minimum requirements")
	assert.Less(t, result.ErrorRate, 2.0, "Light load error rate should be manageable")
	
	// Memory usage should be reasonable
	memoryIncrease := result.MemoryUsageEnd - result.MemoryUsageStart
	assert.Less(t, memoryIncrease, int64(100*1024*1024), "Memory usage increase should be under 100MB")
	
	t.Logf("Light Load Performance: Avg=%v, P95=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.Throughput, result.ErrorRate)
}

// TestMediumLoad100Users tests system behavior under medium concurrent load
func (suite *LoadTestSuite) TestMediumLoad100Users(t *testing.T) {
	profile := suite.loadProfiles["medium"]
	result := suite.runLoadTestWithProfile(t, profile)
	
	// Medium load assertions - more relaxed than light load
	assert.Less(t, result.AverageLatency, 100*time.Millisecond, "Medium load average latency should be acceptable")
	assert.Less(t, result.P95Latency, 500*time.Millisecond, "Medium load P95 latency should be manageable")
	assert.Greater(t, result.Throughput, 300.0, "Medium load throughput should meet requirements")
	assert.Less(t, result.ErrorRate, 5.0, "Medium load error rate should be acceptable")
	
	// Resource usage checks
	assert.Less(t, result.GoroutineCountPeak, 200, "Goroutine count should not exceed reasonable limits")
	
	t.Logf("Medium Load Performance: Avg=%v, P95=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.Throughput, result.ErrorRate)
}

// TestHeavyLoad1000Users tests system behavior under heavy concurrent load
func (suite *LoadTestSuite) TestHeavyLoad1000Users(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping heavy load test in short mode")
	}

	profile := suite.loadProfiles["heavy"]
	result := suite.runLoadTestWithProfile(t, profile)
	
	// Heavy load assertions - system should remain stable but performance may degrade
	assert.Less(t, result.AverageLatency, 500*time.Millisecond, "Heavy load average latency should remain reasonable")
	assert.Less(t, result.P95Latency, 2*time.Second, "Heavy load P95 latency should not exceed 2 seconds")
	assert.Greater(t, result.Throughput, 1000.0, "Heavy load throughput should meet minimum requirements")
	assert.Less(t, result.ErrorRate, 10.0, "Heavy load error rate should remain below 10%")
	
	// System should not crash or become unresponsive
	assert.Greater(t, result.SuccessfulOperations, result.FailedOperations, "More operations should succeed than fail")
	
	t.Logf("Heavy Load Performance: Avg=%v, P95=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.Throughput, result.ErrorRate)
}

// TestSpikeLoadSuddenIncrease tests system resilience to sudden traffic spikes
func (suite *LoadTestSuite) TestSpikeLoadSuddenIncrease(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping spike load test in short mode")
	}

	profile := suite.loadProfiles["spike"]
	result := suite.runLoadTestWithProfile(t, profile)
	
	// Spike load assertions - system should handle spikes gracefully
	// May have higher latency and error rates but should not crash
	assert.Less(t, result.ErrorRate, 20.0, "Spike load error rate should not exceed 20%")
	assert.Greater(t, result.Throughput, 500.0, "Spike load should maintain minimum throughput")
	
	// System should recover and remain responsive
	assert.Less(t, result.P99Latency, 5*time.Second, "P99 latency should not exceed 5 seconds even during spikes")
	
	t.Logf("Spike Load Performance: Avg=%v, P95=%v, P99=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.P99Latency, result.Throughput, result.ErrorRate)
}

// TestSustainedLoadLongDuration tests system stability over extended periods
func (suite *LoadTestSuite) TestSustainedLoadLongDuration(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping sustained load test in short mode")
	}

	// Create extended duration profile
	sustainedProfile := &LoadProfile{
		Name:                    "Sustained Load Test",
		Description:             "Extended duration load test for stability",
		Duration:                20 * time.Minute,
		ConcurrentWorkers:       200,
		RequestsPerSecond:       1000,
		PayloadSizeBytes:        2048,
		OperationDistribution:   map[string]float64{"PUT": 0.3, "GET": 0.6, "DELETE": 0.1},
		AddressPatternComplexity: "medium",
		EnableLatencyJitter:     true,
		FailureInjectionRate:    0.02,
	}

	result := suite.runLoadTestWithProfile(t, sustainedProfile)
	
	// Sustained load assertions - focus on stability over time
	assert.Less(t, result.ErrorRate, 5.0, "Sustained load error rate should remain stable")
	
	// Memory usage should not continuously grow (no memory leaks)
	memoryGrowth := result.MemoryUsageEnd - result.MemoryUsageStart
	assert.Less(t, memoryGrowth, int64(500*1024*1024), "Memory growth should be bounded (no major leaks)")
	
	// Performance should not significantly degrade over time
	assert.Greater(t, result.Throughput, 800.0, "Sustained load should maintain reasonable throughput")
	
	t.Logf("Sustained Load Performance: Duration=%v, Throughput=%.2f ops/sec, Errors=%.2f%%, MemGrowth=%dMB",
		sustainedProfile.Duration, result.Throughput, result.ErrorRate, memoryGrowth/(1024*1024))
}

// TestMixedOperationsRealistic tests realistic mixed workload patterns
func (suite *LoadTestSuite) TestMixedOperationsRealistic(t *testing.T) {
	// Create realistic mixed operations profile
	realisticProfile := &LoadProfile{
		Name:                    "Realistic Mixed Operations",
		Description:             "Realistic distribution of operations with varying payloads",
		Duration:                5 * time.Minute,
		ConcurrentWorkers:       50,
		RequestsPerSecond:       200,
		PayloadSizeBytes:        4096, // Will be varied in implementation
		OperationDistribution:   map[string]float64{"PUT": 0.2, "GET": 0.7, "DELETE": 0.1},
		AddressPatternComplexity: "complex",
		EnableLatencyJitter:     true,
		FailureInjectionRate:    0.015, // 1.5% realistic failure rate
	}

	result := suite.runMixedOperationsTest(t, realisticProfile)
	
	// Realistic workload assertions
	assert.Less(t, result.AverageLatency, 100*time.Millisecond, "Mixed operations average latency should be reasonable")
	assert.Less(t, result.P95Latency, 500*time.Millisecond, "Mixed operations P95 latency should be acceptable")
	assert.Greater(t, result.Throughput, 150.0, "Mixed operations throughput should meet requirements")
	assert.Less(t, result.ErrorRate, 3.0, "Mixed operations error rate should be low")
	
	t.Logf("Mixed Operations Performance: Avg=%v, P95=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		result.AverageLatency, result.P95Latency, result.Throughput, result.ErrorRate)
}

// TestLargePayloadStress tests system behavior with large payloads
func (suite *LoadTestSuite) TestLargePayloadStress(t *testing.T) {
	// Test with increasingly large payloads
	payloadSizes := []int{
		64 * 1024,    // 64KB
		256 * 1024,   // 256KB
		1024 * 1024,  // 1MB
		4 * 1024 * 1024, // 4MB
	}

	for _, payloadSize := range payloadSizes {
		t.Run(fmt.Sprintf("Payload_%dKB", payloadSize/1024), func(t *testing.T) {
			largePayloadProfile := &LoadProfile{
				Name:                    fmt.Sprintf("Large Payload %dKB", payloadSize/1024),
				Description:             "Large payload stress test",
				Duration:                2 * time.Minute,
				ConcurrentWorkers:       20,
				RequestsPerSecond:       50,
				PayloadSizeBytes:        payloadSize,
				OperationDistribution:   map[string]float64{"PUT": 0.5, "GET": 0.5, "DELETE": 0.0},
				AddressPatternComplexity: "simple",
				EnableLatencyJitter:     false,
				FailureInjectionRate:    0.0,
			}

			result := suite.runLoadTestWithProfile(t, largePayloadProfile)
			
			// Large payload assertions - latency will be higher but should not fail
			maxExpectedLatency := time.Duration(payloadSize/1024) * time.Millisecond // 1ms per KB
			if maxExpectedLatency < 100*time.Millisecond {
				maxExpectedLatency = 100 * time.Millisecond
			}
			
			assert.Less(t, result.AverageLatency, maxExpectedLatency, 
				"Large payload average latency should scale reasonably with size")
			assert.Less(t, result.ErrorRate, 2.0, "Large payload error rate should be low")
			assert.Greater(t, result.SuccessfulOperations, int64(0), "Some operations should succeed")
			
			t.Logf("Large Payload %dKB: Avg=%v, P95=%v, Throughput=%.2f ops/sec", 
				payloadSize/1024, result.AverageLatency, result.P95Latency, result.Throughput)
		})
	}
}

// TestHighConcurrencyStress tests system behavior under very high concurrency
func (suite *LoadTestSuite) TestHighConcurrencyStress(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping high concurrency test in short mode")
	}

	concurrencyProfile := &LoadProfile{
		Name:                    "High Concurrency Stress",
		Description:             "Very high concurrency with many goroutines",
		Duration:                3 * time.Minute,
		ConcurrentWorkers:       5000, // Very high concurrency
		RequestsPerSecond:       10000,
		PayloadSizeBytes:        512,
		OperationDistribution:   map[string]float64{"PUT": 0.1, "GET": 0.85, "DELETE": 0.05},
		AddressPatternComplexity: "simple",
		EnableLatencyJitter:     true,
		FailureInjectionRate:    0.0,
	}

	result := suite.runLoadTestWithProfile(t, concurrencyProfile)
	
	// High concurrency assertions - system should not deadlock or crash
	assert.Less(t, result.ErrorRate, 15.0, "High concurrency error rate should be manageable")
	assert.Greater(t, result.Throughput, 2000.0, "High concurrency should achieve reasonable throughput")
	assert.Greater(t, result.SuccessfulOperations, result.FailedOperations, 
		"More operations should succeed than fail even under high concurrency")
	
	t.Logf("High Concurrency Performance: Workers=%d, Avg=%v, Throughput=%.2f ops/sec, Errors=%.2f%%",
		concurrencyProfile.ConcurrentWorkers, result.AverageLatency, result.Throughput, result.ErrorRate)
}

// TestMemoryPressureTest tests system behavior under memory pressure
func (suite *LoadTestSuite) TestMemoryPressureTest(t *testing.T) {
	if testing.Short() {
		t.Skip("Skipping memory pressure test in short mode")
	}

	// Force GC before test to get clean baseline
	runtime.GC()
	runtime.GC()

	memoryProfile := &LoadProfile{
		Name:                    "Memory Pressure Test",
		Description:             "Test under memory pressure with large objects",
		Duration:                5 * time.Minute,
		ConcurrentWorkers:       100,
		RequestsPerSecond:       500,
		PayloadSizeBytes:        100 * 1024, // 100KB payloads
		OperationDistribution:   map[string]float64{"PUT": 0.8, "GET": 0.2, "DELETE": 0.0}, // Mostly writes to increase memory
		AddressPatternComplexity: "complex",
		EnableLatencyJitter:     true,
		FailureInjectionRate:    0.0,
	}

	result := suite.runLoadTestWithProfile(t, memoryProfile)
	
	// Memory pressure assertions
	assert.Less(t, result.ErrorRate, 10.0, "Memory pressure should not cause excessive errors")
	assert.Greater(t, result.Throughput, 200.0, "Memory pressure should maintain minimum throughput")
	
	// Check for reasonable memory growth
	memoryGrowth := result.MemoryUsageEnd - result.MemoryUsageStart
	assert.Less(t, memoryGrowth, int64(2*1024*1024*1024), "Memory growth should be bounded under 2GB")
	
	t.Logf("Memory Pressure: MemGrowth=%dMB, Peak=%dMB, Throughput=%.2f ops/sec",
		memoryGrowth/(1024*1024), result.MemoryUsagePeak/(1024*1024), result.Throughput)
}

// Core load testing implementation

func (suite *LoadTestSuite) runLoadTestWithProfile(t *testing.T, profile *LoadProfile) *PerformanceStats {
	t.Logf("Starting load test: %s", profile.Name)
	t.Logf("Profile: Workers=%d, RPS=%.1f, Duration=%v, PayloadSize=%d bytes",
		profile.ConcurrentWorkers, profile.RequestsPerSecond, profile.Duration, profile.PayloadSizeBytes)

	// Reset performance stats
	suite.performanceStats = &PerformanceStats{
		latencies:        make([]time.Duration, 0, int(profile.Duration.Seconds()*profile.RequestsPerSecond)),
		LatencyHistogram: make([]int64, 100),
		startTime:        time.Now(),
		MinLatency:       time.Hour, // Initialize to very high value
	}

	// Start resource monitoring
	suite.resourceMonitor.Start()
	defer suite.resourceMonitor.Stop()

	// Record initial resource usage
	var memStats runtime.MemStats
	runtime.ReadMemStats(&memStats)
	suite.performanceStats.MemoryUsageStart = int64(memStats.Alloc)
	suite.performanceStats.GoroutineCountStart = runtime.NumGoroutine()

	// Create worker channels
	workChan := make(chan WorkItem, profile.ConcurrentWorkers*10)
	resultChan := make(chan WorkResult, profile.ConcurrentWorkers*10)

	// Start workers
	var workerWg sync.WaitGroup
	for i := 0; i < profile.ConcurrentWorkers; i++ {
		workerWg.Add(1)
		go suite.loadTestWorker(i, workChan, resultChan, &workerWg)
	}

	// Start result collector
	var collectorWg sync.WaitGroup
	collectorWg.Add(1)
	go suite.resultCollector(resultChan, &collectorWg)

	// Generate work items
	suite.generateWorkload(profile, workChan)

	// Wait for workers to complete
	close(workChan)
	workerWg.Wait()

	// Wait for result collection to complete
	close(resultChan)
	collectorWg.Wait()

	// Record final resource usage
	runtime.ReadMemStats(&memStats)
	suite.performanceStats.MemoryUsageEnd = int64(memStats.Alloc)
	suite.performanceStats.GoroutineCountEnd = runtime.NumGoroutine()
	suite.performanceStats.endTime = time.Now()

	// Calculate final metrics
	suite.calculateMetrics()

	return suite.performanceStats
}

func (suite *LoadTestSuite) runMixedOperationsTest(t *testing.T, profile *LoadProfile) *PerformanceStats {
	// Similar to runLoadTestWithProfile but with varying payload sizes
	return suite.runLoadTestWithProfile(t, profile)
}

func (suite *LoadTestSuite) generateWorkload(profile *LoadProfile, workChan chan<- WorkItem) {
	ticker := time.NewTicker(time.Duration(float64(time.Second) / profile.RequestsPerSecond))
	defer ticker.Stop()

	deadline := time.Now().Add(profile.Duration)
	itemID := 0

	for time.Now().Before(deadline) {
		select {
		case <-ticker.C:
			workItem := suite.createWorkItem(profile, itemID)
			select {
			case workChan <- workItem:
				itemID++
			default:
				// Channel full, skip this work item
			}
		}
	}
}

func (suite *LoadTestSuite) createWorkItem(profile *LoadProfile, itemID int) WorkItem {
	// Determine operation type based on distribution
	rand.Seed(time.Now().UnixNano() + int64(itemID))
	r := rand.Float64()
	
	var operation string
	cumulative := 0.0
	for op, prob := range profile.OperationDistribution {
		cumulative += prob
		if r <= cumulative {
			operation = op
			break
		}
	}

	// Generate address based on complexity
	address := suite.generateAddress(profile.AddressPatternComplexity, itemID)

	// Generate payload
	payload := suite.generatePayload(profile.PayloadSizeBytes, itemID)

	// Apply failure injection if enabled
	shouldFail := false
	if profile.FailureInjectionRate > 0 {
		shouldFail = rand.Float64() < profile.FailureInjectionRate
	}

	return WorkItem{
		ID:        itemID,
		Operation: operation,
		Address:   address,
		Payload:   payload,
		ShouldFail: shouldFail,
		Timestamp: time.Now(),
	}
}

func (suite *LoadTestSuite) generateAddress(complexity string, id int) string {
	var pattern string
	switch complexity {
	case "simple":
		pattern = fmt.Sprintf("ucxl://agent-%d:developer@project:task-%d/*^", id%10, id)
	case "medium":
		pattern = fmt.Sprintf("ucxl://user-%d:role-%d@project-%d:task-%d/path/%d*^", 
			id%100, id%3, id%20, id, id%5)
	case "complex":
		pattern = fmt.Sprintf("ucxl://service-%d:role-%d@namespace-%d:operation-%d/api/v%d/resource-%d*^",
			id%500, id%5, id%50, id, (id%3)+1, id%100)
	default:
		pattern = fmt.Sprintf("ucxl://default-%d:developer@project:task-%d/*^", id, id)
	}
	return pattern
}

func (suite *LoadTestSuite) generatePayload(size int, id int) []byte {
	payload := make([]byte, size)
	
	// Create realistic payload with some structure
	data := map[string]interface{}{
		"id":        id,
		"timestamp": time.Now().Unix(),
		"type":      "load_test_data",
		"content":   make([]byte, size-200), // Leave room for JSON overhead
	}
	
	// Fill content with pseudo-random but deterministic data
	rand.Seed(int64(id))
	for i := range data["content"].([]byte) {
		data["content"].([]byte)[i] = byte(rand.Intn(256))
	}
	
	jsonData, _ := json.Marshal(data)
	
	// Pad or truncate to exact size
	if len(jsonData) < size {
		padding := make([]byte, size-len(jsonData))
		payload = append(jsonData, padding...)
	} else {
		payload = jsonData[:size]
	}
	
	return payload
}

func (suite *LoadTestSuite) loadTestWorker(workerID int, workChan <-chan WorkItem, resultChan chan<- WorkResult, wg *sync.WaitGroup) {
	defer wg.Done()

	for workItem := range workChan {
		startTime := time.Now()
		var err error
		var success bool

		// Simulate failure injection
		if workItem.ShouldFail {
			err = fmt.Errorf("injected failure for testing")
			success = false
		} else {
			// Perform actual operation
			switch workItem.Operation {
			case "PUT":
				err = suite.dhtStorage.PutValue(suite.ctx, workItem.Address, workItem.Payload)
			case "GET":
				_, err = suite.dhtStorage.GetValue(suite.ctx, workItem.Address)
			case "DELETE":
				// Mock DHT doesn't have delete, so simulate it
				_, err = suite.dhtStorage.GetValue(suite.ctx, workItem.Address)
				if err == nil {
					// Simulate delete by putting empty value
					err = suite.dhtStorage.PutValue(suite.ctx, workItem.Address, []byte{})
				}
			default:
				err = fmt.Errorf("unknown operation: %s", workItem.Operation)
			}
			success = err == nil
		}

		duration := time.Since(startTime)

		result := WorkResult{
			WorkerID:    workerID,
			WorkItemID:  workItem.ID,
			Operation:   workItem.Operation,
			Duration:    duration,
			Success:     success,
			Error:       err,
			CompletedAt: time.Now(),
		}

		select {
		case resultChan <- result:
		default:
			// Result channel full, drop result (shouldn't happen with proper sizing)
		}
	}
}

func (suite *LoadTestSuite) resultCollector(resultChan <-chan WorkResult, wg *sync.WaitGroup) {
	defer wg.Done()

	for result := range resultChan {
		suite.performanceStats.mu.Lock()
		
		atomic.AddInt64(&suite.performanceStats.TotalOperations, 1)
		
		if result.Success {
			atomic.AddInt64(&suite.performanceStats.SuccessfulOperations, 1)
		} else {
			atomic.AddInt64(&suite.performanceStats.FailedOperations, 1)
		}

		// Record latency
		suite.performanceStats.latencies = append(suite.performanceStats.latencies, result.Duration)
		
		// Update min/max latency
		if result.Duration < suite.performanceStats.MinLatency {
			suite.performanceStats.MinLatency = result.Duration
		}
		if result.Duration > suite.performanceStats.MaxLatency {
			suite.performanceStats.MaxLatency = result.Duration
		}

		// Update latency histogram (0-99ms buckets)
		bucketIndex := int(result.Duration.Nanoseconds() / int64(time.Millisecond))
		if bucketIndex >= len(suite.performanceStats.LatencyHistogram) {
			bucketIndex = len(suite.performanceStats.LatencyHistogram) - 1
		}
		suite.performanceStats.LatencyHistogram[bucketIndex]++

		suite.performanceStats.mu.Unlock()
	}
}

func (suite *LoadTestSuite) calculateMetrics() {
	suite.performanceStats.mu.Lock()
	defer suite.performanceStats.mu.Unlock()

	if len(suite.performanceStats.latencies) == 0 {
		return
	}

	// Calculate average latency
	var totalLatency time.Duration
	for _, latency := range suite.performanceStats.latencies {
		totalLatency += latency
	}
	suite.performanceStats.AverageLatency = totalLatency / time.Duration(len(suite.performanceStats.latencies))

	// Calculate percentiles
	latencies := make([]time.Duration, len(suite.performanceStats.latencies))
	copy(latencies, suite.performanceStats.latencies)
	
	// Simple sort for percentile calculation (could use more efficient algorithm)
	for i := 0; i < len(latencies); i++ {
		for j := i + 1; j < len(latencies); j++ {
			if latencies[i] > latencies[j] {
				latencies[i], latencies[j] = latencies[j], latencies[i]
			}
		}
	}

	// Calculate percentiles
	suite.performanceStats.P50Latency = latencies[len(latencies)*50/100]
	suite.performanceStats.P95Latency = latencies[len(latencies)*95/100]
	suite.performanceStats.P99Latency = latencies[len(latencies)*99/100]

	// Calculate throughput
	duration := suite.performanceStats.endTime.Sub(suite.performanceStats.startTime)
	suite.performanceStats.Throughput = float64(suite.performanceStats.TotalOperations) / duration.Seconds()

	// Calculate error rate
	if suite.performanceStats.TotalOperations > 0 {
		suite.performanceStats.ErrorRate = float64(suite.performanceStats.FailedOperations) / float64(suite.performanceStats.TotalOperations) * 100
	}

	// Update memory usage peak
	var memStats runtime.MemStats
	runtime.ReadMemStats(&memStats)
	if int64(memStats.Alloc) > suite.performanceStats.MemoryUsagePeak {
		suite.performanceStats.MemoryUsagePeak = int64(memStats.Alloc)
	}

	// Update goroutine count peak  
	currentGoroutines := runtime.NumGoroutine()
	if currentGoroutines > suite.performanceStats.GoroutineCountPeak {
		suite.performanceStats.GoroutineCountPeak = currentGoroutines
	}
}

// Supporting types

type WorkItem struct {
	ID        int       `json:"id"`
	Operation string    `json:"operation"`
	Address   string    `json:"address"`
	Payload   []byte    `json:"payload"`
	ShouldFail bool     `json:"should_fail"`
	Timestamp time.Time `json:"timestamp"`
}

type WorkResult struct {
	WorkerID    int           `json:"worker_id"`
	WorkItemID  int           `json:"work_item_id"`
	Operation   string        `json:"operation"`
	Duration    time.Duration `json:"duration"`
	Success     bool          `json:"success"`
	Error       error         `json:"error,omitempty"`
	CompletedAt time.Time     `json:"completed_at"`
}

// ResourceMonitor implementation

func (rm *ResourceMonitor) Start() {
	rm.mu.Lock()
	defer rm.mu.Unlock()

	if rm.monitoring {
		return
	}

	rm.monitoring = true
	go rm.monitorResources()
}

func (rm *ResourceMonitor) Stop() {
	rm.mu.Lock()
	defer rm.mu.Unlock()
	rm.monitoring = false
}

func (rm *ResourceMonitor) monitorResources() {
	ticker := time.NewTicker(rm.interval)
	defer ticker.Stop()

	for {
		rm.mu.RLock()
		if !rm.monitoring {
			rm.mu.RUnlock()
			break
		}
		rm.mu.RUnlock()

		// Collect metrics
		var memStats runtime.MemStats
		runtime.ReadMemStats(&memStats)

		rm.mu.Lock()
		rm.memoryUsage = append(rm.memoryUsage, int64(memStats.Alloc))
		rm.goroutineCount = append(rm.goroutineCount, runtime.NumGoroutine())
		rm.timestamps = append(rm.timestamps, time.Now())
		rm.mu.Unlock()

		<-ticker.C
	}
}