🎭 CHORUS now contains full BZZZ functionality adapted for containers Core systems ported: - P2P networking (libp2p with DHT and PubSub) - Task coordination (COOEE protocol) - HMMM collaborative reasoning - SHHH encryption and security - SLURP admin election system - UCXL content addressing - UCXI server integration - Hypercore logging system - Health monitoring and graceful shutdown - License validation with KACHING Container adaptations: - Environment variable configuration (no YAML files) - Container-optimized logging to stdout/stderr - Auto-generated agent IDs for container deployments - Docker-first architecture All proven BZZZ P2P protocols, AI integration, and collaboration features are now available in containerized form. Next: Build and test container deployment. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
307 lines
9.0 KiB
Go
307 lines
9.0 KiB
Go
package health
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/http"
|
|
"time"
|
|
|
|
"chorus.services/bzzz/pkg/shutdown"
|
|
)
|
|
|
|
// IntegrationExample demonstrates how to integrate health monitoring and graceful shutdown
|
|
func IntegrationExample() {
|
|
// Create logger (in real implementation, use your logging system)
|
|
logger := &defaultLogger{}
|
|
|
|
// Create shutdown manager
|
|
shutdownManager := shutdown.NewManager(30*time.Second, logger)
|
|
|
|
// Create health manager
|
|
healthManager := NewManager("node-123", "v1.0.0", logger)
|
|
|
|
// Connect health manager to shutdown manager for critical failures
|
|
healthManager.SetShutdownManager(shutdownManager)
|
|
|
|
// Register some example health checks
|
|
setupHealthChecks(healthManager)
|
|
|
|
// Create and register components for graceful shutdown
|
|
setupShutdownComponents(shutdownManager, healthManager)
|
|
|
|
// Start systems
|
|
if err := healthManager.Start(); err != nil {
|
|
logger.Error("Failed to start health manager: %v", err)
|
|
return
|
|
}
|
|
|
|
// Start health HTTP server
|
|
if err := healthManager.StartHTTPServer(8081); err != nil {
|
|
logger.Error("Failed to start health HTTP server: %v", err)
|
|
return
|
|
}
|
|
|
|
// Add shutdown hooks
|
|
setupShutdownHooks(shutdownManager, healthManager, logger)
|
|
|
|
// Start shutdown manager (begins listening for signals)
|
|
shutdownManager.Start()
|
|
|
|
logger.Info("🚀 System started with integrated health monitoring and graceful shutdown")
|
|
logger.Info("📊 Health endpoints available at:")
|
|
logger.Info(" - http://localhost:8081/health (overall health)")
|
|
logger.Info(" - http://localhost:8081/health/ready (readiness)")
|
|
logger.Info(" - http://localhost:8081/health/live (liveness)")
|
|
logger.Info(" - http://localhost:8081/health/checks (detailed checks)")
|
|
|
|
// Wait for shutdown
|
|
shutdownManager.Wait()
|
|
logger.Info("✅ System shutdown completed")
|
|
}
|
|
|
|
// setupHealthChecks registers various health checks
|
|
func setupHealthChecks(healthManager *Manager) {
|
|
// Database connectivity check (critical)
|
|
databaseCheck := CreateDatabaseCheck("primary-db", func() error {
|
|
// Simulate database ping
|
|
time.Sleep(10 * time.Millisecond)
|
|
// Return nil for healthy, error for unhealthy
|
|
return nil
|
|
})
|
|
healthManager.RegisterCheck(databaseCheck)
|
|
|
|
// Memory usage check (warning only)
|
|
memoryCheck := CreateMemoryCheck(0.85) // Alert if > 85%
|
|
healthManager.RegisterCheck(memoryCheck)
|
|
|
|
// Disk space check (warning only)
|
|
diskCheck := CreateDiskSpaceCheck("/var/lib/bzzz", 0.90) // Alert if > 90%
|
|
healthManager.RegisterCheck(diskCheck)
|
|
|
|
// Custom application-specific health check
|
|
customCheck := &HealthCheck{
|
|
Name: "p2p-connectivity",
|
|
Description: "P2P network connectivity check",
|
|
Enabled: true,
|
|
Critical: true, // This is critical for P2P systems
|
|
Interval: 15 * time.Second,
|
|
Timeout: 10 * time.Second,
|
|
Checker: func(ctx context.Context) CheckResult {
|
|
// Simulate P2P connectivity check
|
|
time.Sleep(50 * time.Millisecond)
|
|
|
|
// Simulate occasionally failing check
|
|
connected := time.Now().Unix()%10 != 0 // Fail 10% of the time
|
|
|
|
if !connected {
|
|
return CheckResult{
|
|
Healthy: false,
|
|
Message: "No P2P peers connected",
|
|
Details: map[string]interface{}{
|
|
"connected_peers": 0,
|
|
"min_peers": 1,
|
|
},
|
|
Timestamp: time.Now(),
|
|
}
|
|
}
|
|
|
|
return CheckResult{
|
|
Healthy: true,
|
|
Message: "P2P connectivity OK",
|
|
Details: map[string]interface{}{
|
|
"connected_peers": 5,
|
|
"min_peers": 1,
|
|
},
|
|
Timestamp: time.Now(),
|
|
}
|
|
},
|
|
}
|
|
healthManager.RegisterCheck(customCheck)
|
|
|
|
// Election system health check
|
|
electionCheck := &HealthCheck{
|
|
Name: "election-system",
|
|
Description: "Election system health check",
|
|
Enabled: true,
|
|
Critical: false, // Elections can be temporarily unhealthy
|
|
Interval: 30 * time.Second,
|
|
Timeout: 5 * time.Second,
|
|
Checker: func(ctx context.Context) CheckResult {
|
|
// Simulate election system check
|
|
healthy := true
|
|
message := "Election system operational"
|
|
|
|
return CheckResult{
|
|
Healthy: healthy,
|
|
Message: message,
|
|
Details: map[string]interface{}{
|
|
"current_admin": "node-456",
|
|
"election_term": 42,
|
|
"last_election": time.Now().Add(-10 * time.Minute),
|
|
},
|
|
Timestamp: time.Now(),
|
|
}
|
|
},
|
|
}
|
|
healthManager.RegisterCheck(electionCheck)
|
|
}
|
|
|
|
// setupShutdownComponents registers components for graceful shutdown
|
|
func setupShutdownComponents(shutdownManager *shutdown.Manager, healthManager *Manager) {
|
|
// Register health manager for shutdown (high priority to stop health checks early)
|
|
healthComponent := shutdown.NewGenericComponent("health-manager", 10, true).
|
|
SetShutdownFunc(func(ctx context.Context) error {
|
|
return healthManager.Stop()
|
|
})
|
|
shutdownManager.Register(healthComponent)
|
|
|
|
// Simulate HTTP server
|
|
httpServer := &http.Server{Addr: ":8080"}
|
|
httpComponent := shutdown.NewHTTPServerComponent("main-http-server", httpServer, 20)
|
|
shutdownManager.Register(httpComponent)
|
|
|
|
// Simulate P2P node
|
|
p2pComponent := shutdown.NewP2PNodeComponent("p2p-node", func() error {
|
|
// Simulate P2P node cleanup
|
|
time.Sleep(2 * time.Second)
|
|
return nil
|
|
}, 30)
|
|
shutdownManager.Register(p2pComponent)
|
|
|
|
// Simulate database connections
|
|
dbComponent := shutdown.NewDatabaseComponent("database-pool", func() error {
|
|
// Simulate database connection cleanup
|
|
time.Sleep(1 * time.Second)
|
|
return nil
|
|
}, 40)
|
|
shutdownManager.Register(dbComponent)
|
|
|
|
// Simulate worker pool
|
|
workerStopCh := make(chan struct{})
|
|
workerComponent := shutdown.NewWorkerPoolComponent("background-workers", workerStopCh, 5, 50)
|
|
shutdownManager.Register(workerComponent)
|
|
|
|
// Simulate monitoring/metrics system
|
|
monitoringComponent := shutdown.NewMonitoringComponent("metrics-system", func() error {
|
|
// Simulate metrics system cleanup
|
|
time.Sleep(500 * time.Millisecond)
|
|
return nil
|
|
}, 60)
|
|
shutdownManager.Register(monitoringComponent)
|
|
}
|
|
|
|
// setupShutdownHooks adds hooks for different shutdown phases
|
|
func setupShutdownHooks(shutdownManager *shutdown.Manager, healthManager *Manager, logger shutdown.Logger) {
|
|
// Pre-shutdown hook: Mark system as stopping
|
|
shutdownManager.AddHook(shutdown.PhasePreShutdown, func(ctx context.Context) error {
|
|
logger.Info("🔄 Pre-shutdown: Marking system as stopping")
|
|
|
|
// Update health status to stopping
|
|
status := healthManager.GetStatus()
|
|
status.Status = StatusStopping
|
|
status.Message = "System is shutting down"
|
|
|
|
return nil
|
|
})
|
|
|
|
// Shutdown hook: Log progress
|
|
shutdownManager.AddHook(shutdown.PhaseShutdown, func(ctx context.Context) error {
|
|
logger.Info("🔄 Shutdown phase: Components are being shut down")
|
|
return nil
|
|
})
|
|
|
|
// Post-shutdown hook: Final health status update and cleanup
|
|
shutdownManager.AddHook(shutdown.PhasePostShutdown, func(ctx context.Context) error {
|
|
logger.Info("🔄 Post-shutdown: Performing final cleanup")
|
|
|
|
// Any final cleanup that needs to happen after components are shut down
|
|
return nil
|
|
})
|
|
|
|
// Cleanup hook: Final logging and state persistence
|
|
shutdownManager.AddHook(shutdown.PhaseCleanup, func(ctx context.Context) error {
|
|
logger.Info("🔄 Cleanup: Finalizing shutdown process")
|
|
|
|
// Save any final state, flush logs, etc.
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// HealthAwareComponent is an example of how to create components that integrate with health monitoring
|
|
type HealthAwareComponent struct {
|
|
name string
|
|
healthManager *Manager
|
|
checkName string
|
|
isRunning bool
|
|
stopCh chan struct{}
|
|
}
|
|
|
|
// NewHealthAwareComponent creates a component that registers its own health check
|
|
func NewHealthAwareComponent(name string, healthManager *Manager) *HealthAwareComponent {
|
|
comp := &HealthAwareComponent{
|
|
name: name,
|
|
healthManager: healthManager,
|
|
checkName: fmt.Sprintf("%s-health", name),
|
|
stopCh: make(chan struct{}),
|
|
}
|
|
|
|
// Register health check for this component
|
|
healthCheck := &HealthCheck{
|
|
Name: comp.checkName,
|
|
Description: fmt.Sprintf("Health check for %s component", name),
|
|
Enabled: true,
|
|
Critical: false,
|
|
Interval: 30 * time.Second,
|
|
Timeout: 10 * time.Second,
|
|
Checker: func(ctx context.Context) CheckResult {
|
|
if comp.isRunning {
|
|
return CheckResult{
|
|
Healthy: true,
|
|
Message: fmt.Sprintf("%s is running normally", comp.name),
|
|
Timestamp: time.Now(),
|
|
}
|
|
}
|
|
|
|
return CheckResult{
|
|
Healthy: false,
|
|
Message: fmt.Sprintf("%s is not running", comp.name),
|
|
Timestamp: time.Now(),
|
|
}
|
|
},
|
|
}
|
|
|
|
healthManager.RegisterCheck(healthCheck)
|
|
return comp
|
|
}
|
|
|
|
// Start starts the component
|
|
func (c *HealthAwareComponent) Start() error {
|
|
c.isRunning = true
|
|
return nil
|
|
}
|
|
|
|
// Name returns the component name
|
|
func (c *HealthAwareComponent) Name() string {
|
|
return c.name
|
|
}
|
|
|
|
// Priority returns the shutdown priority
|
|
func (c *HealthAwareComponent) Priority() int {
|
|
return 50
|
|
}
|
|
|
|
// CanForceStop returns whether the component can be force-stopped
|
|
func (c *HealthAwareComponent) CanForceStop() bool {
|
|
return true
|
|
}
|
|
|
|
// Shutdown gracefully shuts down the component
|
|
func (c *HealthAwareComponent) Shutdown(ctx context.Context) error {
|
|
c.isRunning = false
|
|
close(c.stopCh)
|
|
|
|
// Unregister health check
|
|
c.healthManager.UnregisterCheck(c.checkName)
|
|
|
|
return nil
|
|
} |