Files
CHORUS/pkg/slurp/intelligence/performance_monitor.go
anthonyrawlins 543ab216f9 Complete BZZZ functionality port to CHORUS
🎭 CHORUS now contains full BZZZ functionality adapted for containers

Core systems ported:
- P2P networking (libp2p with DHT and PubSub)
- Task coordination (COOEE protocol)
- HMMM collaborative reasoning
- SHHH encryption and security
- SLURP admin election system
- UCXL content addressing
- UCXI server integration
- Hypercore logging system
- Health monitoring and graceful shutdown
- License validation with KACHING

Container adaptations:
- Environment variable configuration (no YAML files)
- Container-optimized logging to stdout/stderr
- Auto-generated agent IDs for container deployments
- Docker-first architecture

All proven BZZZ P2P protocols, AI integration, and collaboration
features are now available in containerized form.

Next: Build and test container deployment.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-02 20:02:37 +10:00

1066 lines
32 KiB
Go

package intelligence
import (
"context"
"fmt"
"runtime"
"sort"
"sync"
"sync/atomic"
"time"
)
// PerformanceMonitor provides comprehensive performance monitoring and benchmarking
type PerformanceMonitor struct {
mu sync.RWMutex
config *MonitorConfig
metrics *PerformanceMetrics
benchmarks map[string]*BenchmarkSuite
profiler *Profiler
alertManager *AlertManager
reporters []PerformanceReporter
collectors []MetricCollector
isRunning int32
stopChan chan struct{}
collectInterval time.Duration
}
// MonitorConfig defines monitoring configuration
type MonitorConfig struct {
EnableCPUProfiling bool `json:"enable_cpu_profiling"`
EnableMemoryProfiling bool `json:"enable_memory_profiling"`
EnableGCStats bool `json:"enable_gc_stats"`
CollectionInterval time.Duration `json:"collection_interval"`
RetentionPeriod time.Duration `json:"retention_period"`
AlertThresholds *AlertThresholds `json:"alert_thresholds"`
ReportingEnabled bool `json:"reporting_enabled"`
BenchmarkingEnabled bool `json:"benchmarking_enabled"`
MaxMetricHistory int `json:"max_metric_history"`
}
// AlertThresholds defines alert thresholds
type AlertThresholds struct {
CPUUsagePercent float64 `json:"cpu_usage_percent"`
MemoryUsageMB int64 `json:"memory_usage_mb"`
AnalysisTimeMS int64 `json:"analysis_time_ms"`
ErrorRatePercent float64 `json:"error_rate_percent"`
QueueSizeLimit int `json:"queue_size_limit"`
ResponseTimeMS int64 `json:"response_time_ms"`
}
// PerformanceMetrics contains comprehensive performance metrics
type PerformanceMetrics struct {
mu sync.RWMutex
StartTime time.Time `json:"start_time"`
Uptime time.Duration `json:"uptime"`
TotalOperations int64 `json:"total_operations"`
SuccessfulOperations int64 `json:"successful_operations"`
FailedOperations int64 `json:"failed_operations"`
AverageResponseTime time.Duration `json:"average_response_time"`
P95ResponseTime time.Duration `json:"p95_response_time"`
P99ResponseTime time.Duration `json:"p99_response_time"`
CPUUsage float64 `json:"cpu_usage"`
MemoryUsage *MemoryUsage `json:"memory_usage"`
GCStats *GCStats `json:"gc_stats"`
ComponentMetrics map[string]*ComponentMetrics `json:"component_metrics"`
OperationMetrics map[string]*OperationMetrics `json:"operation_metrics"`
ResponseTimeHistory []time.Duration `json:"response_time_history"`
LastUpdated time.Time `json:"last_updated"`
}
// MemoryUsage contains memory usage statistics
type MemoryUsage struct {
AllocBytes uint64 `json:"alloc_bytes"`
TotalAllocBytes uint64 `json:"total_alloc_bytes"`
SysBytes uint64 `json:"sys_bytes"`
NumGC uint32 `json:"num_gc"`
HeapAllocBytes uint64 `json:"heap_alloc_bytes"`
HeapSysBytes uint64 `json:"heap_sys_bytes"`
StackInUse uint64 `json:"stack_in_use"`
StackSys uint64 `json:"stack_sys"`
}
// GCStats contains garbage collection statistics
type GCStats struct {
NumGC uint32 `json:"num_gc"`
PauseTotal time.Duration `json:"pause_total"`
PauseNs []uint64 `json:"pause_ns"`
LastGC time.Time `json:"last_gc"`
NextGC uint64 `json:"next_gc"`
GCCPUFraction float64 `json:"gc_cpu_fraction"`
}
// ComponentMetrics contains metrics for a specific component
type ComponentMetrics struct {
ComponentName string `json:"component_name"`
TotalCalls int64 `json:"total_calls"`
SuccessfulCalls int64 `json:"successful_calls"`
FailedCalls int64 `json:"failed_calls"`
AverageExecutionTime time.Duration `json:"average_execution_time"`
MinExecutionTime time.Duration `json:"min_execution_time"`
MaxExecutionTime time.Duration `json:"max_execution_time"`
ErrorRate float64 `json:"error_rate"`
LastExecutionTime time.Time `json:"last_execution_time"`
CustomMetrics map[string]interface{} `json:"custom_metrics"`
}
// OperationMetrics contains metrics for specific operations
type OperationMetrics struct {
OperationName string `json:"operation_name"`
TotalExecutions int64 `json:"total_executions"`
AverageLatency time.Duration `json:"average_latency"`
P50Latency time.Duration `json:"p50_latency"`
P95Latency time.Duration `json:"p95_latency"`
P99Latency time.Duration `json:"p99_latency"`
ThroughputPerSecond float64 `json:"throughput_per_second"`
ErrorCount int64 `json:"error_count"`
LatencyHistory []time.Duration `json:"latency_history"`
LastExecution time.Time `json:"last_execution"`
}
// BenchmarkSuite contains a suite of benchmarks
type BenchmarkSuite struct {
SuiteName string `json:"suite_name"`
Benchmarks map[string]*Benchmark `json:"benchmarks"`
Results *BenchmarkResults `json:"results"`
Config *BenchmarkConfig `json:"config"`
LastRun time.Time `json:"last_run"`
IsRunning bool `json:"is_running"`
}
// Benchmark defines a specific benchmark test
type Benchmark struct {
Name string `json:"name"`
Description string `json:"description"`
TestFunction func(b *BenchmarkContext) error `json:"-"`
Setup func() error `json:"-"`
Teardown func() error `json:"-"`
Iterations int `json:"iterations"`
Duration time.Duration `json:"duration"`
Parameters map[string]interface{} `json:"parameters"`
Tags []string `json:"tags"`
}
// BenchmarkContext provides context for benchmark execution
type BenchmarkContext struct {
Name string `json:"name"`
Iteration int `json:"iteration"`
StartTime time.Time `json:"start_time"`
Parameters map[string]interface{} `json:"parameters"`
Metrics map[string]interface{} `json:"metrics"`
}
// BenchmarkConfig configures benchmark execution
type BenchmarkConfig struct {
DefaultIterations int `json:"default_iterations"`
MaxDuration time.Duration `json:"max_duration"`
WarmupIterations int `json:"warmup_iterations"`
Parallel bool `json:"parallel"`
CPUProfiling bool `json:"cpu_profiling"`
MemoryProfiling bool `json:"memory_profiling"`
}
// BenchmarkResults contains benchmark execution results
type BenchmarkResults struct {
SuiteName string `json:"suite_name"`
TotalBenchmarks int `json:"total_benchmarks"`
PassedBenchmarks int `json:"passed_benchmarks"`
FailedBenchmarks int `json:"failed_benchmarks"`
TotalDuration time.Duration `json:"total_duration"`
Results map[string]*BenchmarkResult `json:"results"`
Summary *BenchmarkSummary `json:"summary"`
ExecutedAt time.Time `json:"executed_at"`
}
// BenchmarkResult contains results for a single benchmark
type BenchmarkResult struct {
Name string `json:"name"`
Iterations int `json:"iterations"`
TotalDuration time.Duration `json:"total_duration"`
AverageLatency time.Duration `json:"average_latency"`
MinLatency time.Duration `json:"min_latency"`
MaxLatency time.Duration `json:"max_latency"`
StandardDeviation time.Duration `json:"standard_deviation"`
OperationsPerSecond float64 `json:"operations_per_second"`
MemoryAllocated int64 `json:"memory_allocated"`
MemoryAllocations int64 `json:"memory_allocations"`
Success bool `json:"success"`
ErrorMessage string `json:"error_message"`
Percentiles map[int]time.Duration `json:"percentiles"`
CustomMetrics map[string]interface{} `json:"custom_metrics"`
}
// BenchmarkSummary provides summary statistics
type BenchmarkSummary struct {
FastestBenchmark string `json:"fastest_benchmark"`
SlowestBenchmark string `json:"slowest_benchmark"`
AverageLatency time.Duration `json:"average_latency"`
TotalOperations int64 `json:"total_operations"`
OverallThroughput float64 `json:"overall_throughput"`
PerformanceGrade string `json:"performance_grade"`
Recommendations []string `json:"recommendations"`
}
// Profiler provides performance profiling capabilities
type Profiler struct {
enabled bool
cpuProfile *CPUProfile
memoryProfile *MemoryProfile
profiles map[string]*Profile
mu sync.RWMutex
}
// Profile represents a performance profile
type Profile struct {
Name string `json:"name"`
Type string `json:"type"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Duration time.Duration `json:"duration"`
Data map[string]interface{} `json:"data"`
FilePath string `json:"file_path"`
}
// CPUProfile contains CPU profiling data
type CPUProfile struct {
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
SampleRate int `json:"sample_rate"`
TotalSamples int64 `json:"total_samples"`
ProfileData []byte `json:"profile_data"`
HotFunctions []string `json:"hot_functions"`
}
// MemoryProfile contains memory profiling data
type MemoryProfile struct {
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
HeapProfile []byte `json:"heap_profile"`
AllocProfile []byte `json:"alloc_profile"`
TopAllocations []string `json:"top_allocations"`
MemoryLeaks []MemoryLeak `json:"memory_leaks"`
}
// MemoryLeak represents a potential memory leak
type MemoryLeak struct {
Function string `json:"function"`
Size int64 `json:"size"`
Count int64 `json:"count"`
DetectedAt time.Time `json:"detected_at"`
Severity string `json:"severity"`
}
// AlertManager manages performance alerts
type AlertManager struct {
mu sync.RWMutex
thresholds *AlertThresholds
alerts []*Alert
handlers []AlertHandler
enabled bool
}
// Alert represents a performance alert
type Alert struct {
ID string `json:"id"`
Level string `json:"level"` // info, warning, critical
Title string `json:"title"`
Description string `json:"description"`
Metric string `json:"metric"`
Value interface{} `json:"value"`
Threshold interface{} `json:"threshold"`
CreatedAt time.Time `json:"created_at"`
ResolvedAt *time.Time `json:"resolved_at,omitempty"`
Context map[string]interface{} `json:"context"`
}
// AlertHandler interface for handling alerts
type AlertHandler interface {
HandleAlert(alert *Alert) error
GetName() string
IsEnabled() bool
}
// PerformanceReporter interface for reporting performance data
type PerformanceReporter interface {
ReportMetrics(metrics *PerformanceMetrics) error
ReportBenchmarks(results *BenchmarkResults) error
GetName() string
IsEnabled() bool
}
// MetricCollector interface for collecting custom metrics
type MetricCollector interface {
CollectMetrics() (map[string]interface{}, error)
GetName() string
GetInterval() time.Duration
}
// NewPerformanceMonitor creates a new performance monitor
func NewPerformanceMonitor(config *MonitorConfig) *PerformanceMonitor {
if config == nil {
config = &MonitorConfig{
EnableCPUProfiling: true,
EnableMemoryProfiling: true,
EnableGCStats: true,
CollectionInterval: time.Second,
RetentionPeriod: 24 * time.Hour,
ReportingEnabled: true,
BenchmarkingEnabled: true,
MaxMetricHistory: 1000,
AlertThresholds: &AlertThresholds{
CPUUsagePercent: 80.0,
MemoryUsageMB: 500,
AnalysisTimeMS: 5000,
ErrorRatePercent: 5.0,
QueueSizeLimit: 1000,
ResponseTimeMS: 1000,
},
}
}
monitor := &PerformanceMonitor{
config: config,
metrics: NewPerformanceMetrics(),
benchmarks: make(map[string]*BenchmarkSuite),
profiler: NewProfiler(),
alertManager: NewAlertManager(config.AlertThresholds),
reporters: []PerformanceReporter{},
collectors: []MetricCollector{},
stopChan: make(chan struct{}),
collectInterval: config.CollectionInterval,
}
// Initialize built-in collectors
monitor.initializeCollectors()
return monitor
}
// Start begins performance monitoring
func (pm *PerformanceMonitor) Start(ctx context.Context) error {
if !atomic.CompareAndSwapInt32(&pm.isRunning, 0, 1) {
return fmt.Errorf("performance monitor is already running")
}
pm.metrics.StartTime = time.Now()
// Start metric collection goroutine
go pm.collectMetrics(ctx)
// Start alert monitoring if enabled
if pm.config.AlertThresholds != nil {
go pm.monitorAlerts(ctx)
}
return nil
}
// Stop stops performance monitoring
func (pm *PerformanceMonitor) Stop() error {
if !atomic.CompareAndSwapInt32(&pm.isRunning, 1, 0) {
return fmt.Errorf("performance monitor is not running")
}
close(pm.stopChan)
return nil
}
// RecordOperation records metrics for an operation
func (pm *PerformanceMonitor) RecordOperation(operationName string, duration time.Duration, success bool) {
pm.mu.Lock()
defer pm.mu.Unlock()
atomic.AddInt64(&pm.metrics.TotalOperations, 1)
if success {
atomic.AddInt64(&pm.metrics.SuccessfulOperations, 1)
} else {
atomic.AddInt64(&pm.metrics.FailedOperations, 1)
}
// Update operation metrics
if pm.metrics.OperationMetrics == nil {
pm.metrics.OperationMetrics = make(map[string]*OperationMetrics)
}
opMetrics, exists := pm.metrics.OperationMetrics[operationName]
if !exists {
opMetrics = &OperationMetrics{
OperationName: operationName,
LatencyHistory: make([]time.Duration, 0),
}
pm.metrics.OperationMetrics[operationName] = opMetrics
}
opMetrics.TotalExecutions++
opMetrics.LastExecution = time.Now()
if !success {
opMetrics.ErrorCount++
}
// Update latency metrics
opMetrics.LatencyHistory = append(opMetrics.LatencyHistory, duration)
if len(opMetrics.LatencyHistory) > 100 { // Keep last 100 samples
opMetrics.LatencyHistory = opMetrics.LatencyHistory[1:]
}
// Calculate percentiles
pm.updateLatencyPercentiles(opMetrics)
// Update average response time
pm.updateAverageResponseTime(duration)
}
// RecordComponentMetrics records metrics for a specific component
func (pm *PerformanceMonitor) RecordComponentMetrics(componentName string, executionTime time.Duration, success bool, customMetrics map[string]interface{}) {
pm.mu.Lock()
defer pm.mu.Unlock()
if pm.metrics.ComponentMetrics == nil {
pm.metrics.ComponentMetrics = make(map[string]*ComponentMetrics)
}
compMetrics, exists := pm.metrics.ComponentMetrics[componentName]
if !exists {
compMetrics = &ComponentMetrics{
ComponentName: componentName,
MinExecutionTime: executionTime,
MaxExecutionTime: executionTime,
CustomMetrics: make(map[string]interface{}),
}
pm.metrics.ComponentMetrics[componentName] = compMetrics
}
compMetrics.TotalCalls++
compMetrics.LastExecutionTime = time.Now()
if success {
compMetrics.SuccessfulCalls++
} else {
compMetrics.FailedCalls++
}
// Update execution time statistics
totalTime := time.Duration(compMetrics.TotalCalls-1)*compMetrics.AverageExecutionTime + executionTime
compMetrics.AverageExecutionTime = totalTime / time.Duration(compMetrics.TotalCalls)
if executionTime < compMetrics.MinExecutionTime {
compMetrics.MinExecutionTime = executionTime
}
if executionTime > compMetrics.MaxExecutionTime {
compMetrics.MaxExecutionTime = executionTime
}
// Update error rate
compMetrics.ErrorRate = float64(compMetrics.FailedCalls) / float64(compMetrics.TotalCalls)
// Update custom metrics
for key, value := range customMetrics {
compMetrics.CustomMetrics[key] = value
}
}
// GetMetrics returns current performance metrics
func (pm *PerformanceMonitor) GetMetrics() *PerformanceMetrics {
pm.mu.RLock()
defer pm.mu.RUnlock()
// Create a deep copy to avoid race conditions
metricsCopy := *pm.metrics
metricsCopy.Uptime = time.Since(pm.metrics.StartTime)
metricsCopy.LastUpdated = time.Now()
return &metricsCopy
}
// RunBenchmark executes a benchmark suite
func (pm *PerformanceMonitor) RunBenchmark(ctx context.Context, suiteName string) (*BenchmarkResults, error) {
if !pm.config.BenchmarkingEnabled {
return nil, fmt.Errorf("benchmarking is disabled")
}
suite, exists := pm.benchmarks[suiteName]
if !exists {
return nil, fmt.Errorf("benchmark suite '%s' not found", suiteName)
}
suite.IsRunning = true
defer func() { suite.IsRunning = false }()
results := &BenchmarkResults{
SuiteName: suiteName,
Results: make(map[string]*BenchmarkResult),
ExecutedAt: time.Now(),
}
totalBenchmarks := len(suite.Benchmarks)
results.TotalBenchmarks = totalBenchmarks
startTime := time.Now()
// Execute each benchmark
for name, benchmark := range suite.Benchmarks {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
result, err := pm.executeBenchmark(ctx, benchmark)
if err != nil {
result = &BenchmarkResult{
Name: name,
Success: false,
ErrorMessage: err.Error(),
}
results.FailedBenchmarks++
} else {
results.PassedBenchmarks++
}
results.Results[name] = result
}
results.TotalDuration = time.Since(startTime)
results.Summary = pm.generateBenchmarkSummary(results)
suite.Results = results
suite.LastRun = time.Now()
// Report results if reporters are configured
pm.reportBenchmarkResults(results)
return results, nil
}
// AddBenchmark adds a benchmark to a suite
func (pm *PerformanceMonitor) AddBenchmark(suiteName string, benchmark *Benchmark) {
pm.mu.Lock()
defer pm.mu.Unlock()
suite, exists := pm.benchmarks[suiteName]
if !exists {
suite = &BenchmarkSuite{
SuiteName: suiteName,
Benchmarks: make(map[string]*Benchmark),
Config: &BenchmarkConfig{
DefaultIterations: 1000,
MaxDuration: time.Minute,
WarmupIterations: 100,
Parallel: false,
},
}
pm.benchmarks[suiteName] = suite
}
suite.Benchmarks[benchmark.Name] = benchmark
}
// StartProfiling begins performance profiling
func (pm *PerformanceMonitor) StartProfiling(profileType string) error {
return pm.profiler.StartProfiling(profileType)
}
// StopProfiling stops performance profiling
func (pm *PerformanceMonitor) StopProfiling(profileType string) (*Profile, error) {
return pm.profiler.StopProfiling(profileType)
}
// AddReporter adds a performance reporter
func (pm *PerformanceMonitor) AddReporter(reporter PerformanceReporter) {
pm.mu.Lock()
defer pm.mu.Unlock()
pm.reporters = append(pm.reporters, reporter)
}
// AddCollector adds a metric collector
func (pm *PerformanceMonitor) AddCollector(collector MetricCollector) {
pm.mu.Lock()
defer pm.mu.Unlock()
pm.collectors = append(pm.collectors, collector)
}
// GetAlerts returns current alerts
func (pm *PerformanceMonitor) GetAlerts() []*Alert {
return pm.alertManager.GetAlerts()
}
// Private methods
func (pm *PerformanceMonitor) collectMetrics(ctx context.Context) {
ticker := time.NewTicker(pm.collectInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-pm.stopChan:
return
case <-ticker.C:
pm.updateSystemMetrics()
pm.collectCustomMetrics()
}
}
}
func (pm *PerformanceMonitor) updateSystemMetrics() {
pm.mu.Lock()
defer pm.mu.Unlock()
// Update memory usage
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
pm.metrics.MemoryUsage = &MemoryUsage{
AllocBytes: memStats.Alloc,
TotalAllocBytes: memStats.TotalAlloc,
SysBytes: memStats.Sys,
NumGC: memStats.NumGC,
HeapAllocBytes: memStats.HeapAlloc,
HeapSysBytes: memStats.HeapSys,
StackInUse: memStats.StackInuse,
StackSys: memStats.StackSys,
}
// Update GC stats if enabled
if pm.config.EnableGCStats {
pm.metrics.GCStats = &GCStats{
NumGC: memStats.NumGC,
PauseTotal: time.Duration(memStats.PauseTotalNs),
LastGC: time.Unix(0, int64(memStats.LastGC)),
NextGC: memStats.NextGC,
GCCPUFraction: memStats.GCCPUFraction,
}
}
}
func (pm *PerformanceMonitor) collectCustomMetrics() {
for _, collector := range pm.collectors {
if customMetrics, err := collector.CollectMetrics(); err == nil {
// Store custom metrics in component metrics
pm.RecordComponentMetrics(collector.GetName(), 0, true, customMetrics)
}
}
}
func (pm *PerformanceMonitor) updateLatencyPercentiles(opMetrics *OperationMetrics) {
if len(opMetrics.LatencyHistory) == 0 {
return
}
// Sort latencies for percentile calculation
sorted := make([]time.Duration, len(opMetrics.LatencyHistory))
copy(sorted, opMetrics.LatencyHistory)
sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] })
// Calculate percentiles
opMetrics.P50Latency = sorted[len(sorted)*50/100]
opMetrics.P95Latency = sorted[len(sorted)*95/100]
opMetrics.P99Latency = sorted[len(sorted)*99/100]
// Calculate average latency
total := time.Duration(0)
for _, latency := range sorted {
total += latency
}
opMetrics.AverageLatency = total / time.Duration(len(sorted))
// Calculate throughput
if opMetrics.AverageLatency > 0 {
opMetrics.ThroughputPerSecond = float64(time.Second) / float64(opMetrics.AverageLatency)
}
}
func (pm *PerformanceMonitor) updateAverageResponseTime(duration time.Duration) {
// Add to response time history
pm.metrics.ResponseTimeHistory = append(pm.metrics.ResponseTimeHistory, duration)
if len(pm.metrics.ResponseTimeHistory) > pm.config.MaxMetricHistory {
pm.metrics.ResponseTimeHistory = pm.metrics.ResponseTimeHistory[1:]
}
// Calculate percentiles from history
if len(pm.metrics.ResponseTimeHistory) > 0 {
sorted := make([]time.Duration, len(pm.metrics.ResponseTimeHistory))
copy(sorted, pm.metrics.ResponseTimeHistory)
sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] })
pm.metrics.P95ResponseTime = sorted[len(sorted)*95/100]
pm.metrics.P99ResponseTime = sorted[len(sorted)*99/100]
// Update average
total := time.Duration(0)
for _, latency := range sorted {
total += latency
}
pm.metrics.AverageResponseTime = total / time.Duration(len(sorted))
}
}
func (pm *PerformanceMonitor) executeBenchmark(ctx context.Context, benchmark *Benchmark) (*BenchmarkResult, error) {
result := &BenchmarkResult{
Name: benchmark.Name,
Iterations: benchmark.Iterations,
Percentiles: make(map[int]time.Duration),
}
if benchmark.Setup != nil {
if err := benchmark.Setup(); err != nil {
return nil, fmt.Errorf("benchmark setup failed: %w", err)
}
}
if benchmark.Teardown != nil {
defer func() {
if err := benchmark.Teardown(); err != nil {
fmt.Printf("Benchmark teardown failed: %v\n", err)
}
}()
}
latencies := make([]time.Duration, benchmark.Iterations)
var memBefore runtime.MemStats
runtime.ReadMemStats(&memBefore)
startTime := time.Now()
// Execute benchmark iterations
for i := 0; i < benchmark.Iterations; i++ {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
benchCtx := &BenchmarkContext{
Name: benchmark.Name,
Iteration: i,
StartTime: time.Now(),
Parameters: benchmark.Parameters,
Metrics: make(map[string]interface{}),
}
iterStart := time.Now()
if err := benchmark.TestFunction(benchCtx); err != nil {
return nil, fmt.Errorf("benchmark iteration %d failed: %w", i, err)
}
latencies[i] = time.Since(iterStart)
}
result.TotalDuration = time.Since(startTime)
// Calculate statistics
var memAfter runtime.MemStats
runtime.ReadMemStats(&memAfter)
result.MemoryAllocated = int64(memAfter.TotalAlloc - memBefore.TotalAlloc)
result.MemoryAllocations = int64(memAfter.Mallocs - memBefore.Mallocs)
// Calculate latency statistics
sort.Slice(latencies, func(i, j int) bool { return latencies[i] < latencies[j] })
result.MinLatency = latencies[0]
result.MaxLatency = latencies[len(latencies)-1]
// Calculate average
total := time.Duration(0)
for _, latency := range latencies {
total += latency
}
result.AverageLatency = total / time.Duration(len(latencies))
// Calculate operations per second
if result.AverageLatency > 0 {
result.OperationsPerSecond = float64(time.Second) / float64(result.AverageLatency)
}
// Calculate percentiles
result.Percentiles[50] = latencies[len(latencies)*50/100]
result.Percentiles[95] = latencies[len(latencies)*95/100]
result.Percentiles[99] = latencies[len(latencies)*99/100]
// Calculate standard deviation
variance := float64(0)
avgFloat := float64(result.AverageLatency)
for _, latency := range latencies {
diff := float64(latency) - avgFloat
variance += diff * diff
}
variance /= float64(len(latencies))
result.StandardDeviation = time.Duration(variance)
result.Success = true
return result, nil
}
func (pm *PerformanceMonitor) generateBenchmarkSummary(results *BenchmarkResults) *BenchmarkSummary {
summary := &BenchmarkSummary{
Recommendations: []string{},
}
if len(results.Results) == 0 {
return summary
}
fastest := ""
slowest := ""
var fastestTime time.Duration = time.Hour // Large initial value
var slowestTime time.Duration = 0
totalOps := int64(0)
totalLatency := time.Duration(0)
for name, result := range results.Results {
if !result.Success {
continue
}
totalOps += int64(result.Iterations)
totalLatency += result.TotalDuration
if result.AverageLatency < fastestTime {
fastestTime = result.AverageLatency
fastest = name
}
if result.AverageLatency > slowestTime {
slowestTime = result.AverageLatency
slowest = name
}
}
summary.FastestBenchmark = fastest
summary.SlowestBenchmark = slowest
summary.TotalOperations = totalOps
if totalOps > 0 {
summary.AverageLatency = totalLatency / time.Duration(totalOps)
summary.OverallThroughput = float64(totalOps) / results.TotalDuration.Seconds()
}
// Generate performance grade and recommendations
summary.PerformanceGrade = pm.calculatePerformanceGrade(results)
summary.Recommendations = pm.generateRecommendations(results)
return summary
}
func (pm *PerformanceMonitor) calculatePerformanceGrade(results *BenchmarkResults) string {
successRate := float64(results.PassedBenchmarks) / float64(results.TotalBenchmarks)
if successRate < 0.8 {
return "F"
} else if successRate < 0.9 {
return "D"
} else if successRate < 0.95 {
return "C"
} else if successRate < 0.98 {
return "B"
} else {
return "A"
}
}
func (pm *PerformanceMonitor) generateRecommendations(results *BenchmarkResults) []string {
recommendations := []string{}
if results.FailedBenchmarks > 0 {
recommendations = append(recommendations, "Fix failing benchmarks to improve reliability")
}
for _, result := range results.Results {
if result.AverageLatency > time.Millisecond*100 {
recommendations = append(recommendations,
fmt.Sprintf("Optimize %s performance (avg: %v)", result.Name, result.AverageLatency))
}
if result.MemoryAllocated > 1024*1024 { // 1MB
recommendations = append(recommendations,
fmt.Sprintf("Reduce memory allocations in %s", result.Name))
}
}
return recommendations
}
func (pm *PerformanceMonitor) monitorAlerts(ctx context.Context) {
ticker := time.NewTicker(time.Second * 10) // Check every 10 seconds
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-pm.stopChan:
return
case <-ticker.C:
pm.checkAlerts()
}
}
}
func (pm *PerformanceMonitor) checkAlerts() {
metrics := pm.GetMetrics()
thresholds := pm.config.AlertThresholds
// Check memory usage
if metrics.MemoryUsage != nil {
memUsageMB := int64(metrics.MemoryUsage.AllocBytes / 1024 / 1024)
if memUsageMB > thresholds.MemoryUsageMB {
pm.alertManager.CreateAlert("critical", "High Memory Usage",
fmt.Sprintf("Memory usage: %d MB exceeds threshold: %d MB", memUsageMB, thresholds.MemoryUsageMB),
"memory_usage", memUsageMB, thresholds.MemoryUsageMB)
}
}
// Check error rate
if metrics.TotalOperations > 0 {
errorRate := float64(metrics.FailedOperations) / float64(metrics.TotalOperations) * 100
if errorRate > thresholds.ErrorRatePercent {
pm.alertManager.CreateAlert("warning", "High Error Rate",
fmt.Sprintf("Error rate: %.2f%% exceeds threshold: %.2f%%", errorRate, thresholds.ErrorRatePercent),
"error_rate", errorRate, thresholds.ErrorRatePercent)
}
}
// Check response time
if metrics.AverageResponseTime.Milliseconds() > thresholds.ResponseTimeMS {
pm.alertManager.CreateAlert("warning", "High Response Time",
fmt.Sprintf("Average response time: %v exceeds threshold: %d ms",
metrics.AverageResponseTime, thresholds.ResponseTimeMS),
"response_time", metrics.AverageResponseTime, thresholds.ResponseTimeMS)
}
}
func (pm *PerformanceMonitor) reportBenchmarkResults(results *BenchmarkResults) {
for _, reporter := range pm.reporters {
if reporter.IsEnabled() {
go func(r PerformanceReporter) {
if err := r.ReportBenchmarks(results); err != nil {
fmt.Printf("Failed to report benchmarks to %s: %v\n", r.GetName(), err)
}
}(reporter)
}
}
}
func (pm *PerformanceMonitor) initializeCollectors() {
// Add built-in system metrics collector
pm.collectors = append(pm.collectors, &SystemMetricsCollector{})
}
// Helper constructors and implementations
func NewPerformanceMetrics() *PerformanceMetrics {
return &PerformanceMetrics{
ComponentMetrics: make(map[string]*ComponentMetrics),
OperationMetrics: make(map[string]*OperationMetrics),
ResponseTimeHistory: make([]time.Duration, 0),
StartTime: time.Now(),
}
}
func NewProfiler() *Profiler {
return &Profiler{
profiles: make(map[string]*Profile),
}
}
func (p *Profiler) StartProfiling(profileType string) error {
p.mu.Lock()
defer p.mu.Unlock()
profile := &Profile{
Name: profileType,
Type: profileType,
StartTime: time.Now(),
Data: make(map[string]interface{}),
}
p.profiles[profileType] = profile
p.enabled = true
return nil
}
func (p *Profiler) StopProfiling(profileType string) (*Profile, error) {
p.mu.Lock()
defer p.mu.Unlock()
profile, exists := p.profiles[profileType]
if !exists {
return nil, fmt.Errorf("profile not found: %s", profileType)
}
profile.EndTime = time.Now()
profile.Duration = profile.EndTime.Sub(profile.StartTime)
delete(p.profiles, profileType)
return profile, nil
}
func NewAlertManager(thresholds *AlertThresholds) *AlertManager {
return &AlertManager{
thresholds: thresholds,
alerts: make([]*Alert, 0),
handlers: make([]AlertHandler, 0),
enabled: true,
}
}
func (am *AlertManager) CreateAlert(level, title, description, metric string, value, threshold interface{}) {
am.mu.Lock()
defer am.mu.Unlock()
alert := &Alert{
ID: fmt.Sprintf("alert_%d", time.Now().UnixNano()),
Level: level,
Title: title,
Description: description,
Metric: metric,
Value: value,
Threshold: threshold,
CreatedAt: time.Now(),
Context: make(map[string]interface{}),
}
am.alerts = append(am.alerts, alert)
// Notify handlers
for _, handler := range am.handlers {
if handler.IsEnabled() {
go handler.HandleAlert(alert)
}
}
}
func (am *AlertManager) GetAlerts() []*Alert {
am.mu.RLock()
defer am.mu.RUnlock()
alerts := make([]*Alert, len(am.alerts))
copy(alerts, am.alerts)
return alerts
}
// SystemMetricsCollector collects system-level metrics
type SystemMetricsCollector struct{}
func (smc *SystemMetricsCollector) CollectMetrics() (map[string]interface{}, error) {
metrics := make(map[string]interface{})
// Collect goroutine count
metrics["goroutines"] = runtime.NumGoroutine()
// Collect CPU count
metrics["cpus"] = runtime.NumCPU()
return metrics, nil
}
func (smc *SystemMetricsCollector) GetName() string {
return "system_metrics"
}
func (smc *SystemMetricsCollector) GetInterval() time.Duration {
return time.Second * 5
}