package shutdown import ( "context" "fmt" "os" "os/signal" "sync" "syscall" "time" ) // Manager provides coordinated graceful shutdown for all system components type Manager struct { mu sync.RWMutex components map[string]Component hooks map[Phase][]Hook timeout time.Duration forceTimeout time.Duration signals []os.Signal signalCh chan os.Signal shutdownCh chan struct{} completedCh chan struct{} started bool shutdownStarted bool logger Logger } // Component represents a system component that needs graceful shutdown type Component interface { // Name returns the component name for logging Name() string // Shutdown gracefully shuts down the component Shutdown(ctx context.Context) error // Priority returns the shutdown priority (lower numbers shut down first) Priority() int // CanForceStop returns true if the component can be force-stopped CanForceStop() bool } // Hook represents a function to be called during shutdown phases type Hook func(ctx context.Context) error // Phase represents different phases of the shutdown process type Phase int const ( PhasePreShutdown Phase = iota // Before any components are shut down PhaseShutdown // During component shutdown PhasePostShutdown // After all components are shut down PhaseCleanup // Final cleanup phase ) // Logger interface for shutdown logging type Logger interface { Info(msg string, args ...interface{}) Warn(msg string, args ...interface{}) Error(msg string, args ...interface{}) } // NewManager creates a new shutdown manager func NewManager(timeout time.Duration, logger Logger) *Manager { if timeout == 0 { timeout = 30 * time.Second } if logger == nil { logger = &defaultLogger{} } return &Manager{ components: make(map[string]Component), hooks: make(map[Phase][]Hook), timeout: timeout, forceTimeout: timeout + 15*time.Second, signals: []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}, signalCh: make(chan os.Signal, 1), shutdownCh: make(chan struct{}), completedCh: make(chan struct{}), logger: logger, } } // Register adds a component for graceful shutdown func (m *Manager) Register(component Component) { m.mu.Lock() defer m.mu.Unlock() if m.shutdownStarted { m.logger.Warn("Cannot register component '%s' - shutdown already started", component.Name()) return } m.components[component.Name()] = component m.logger.Info("Registered component for graceful shutdown: %s (priority: %d)", component.Name(), component.Priority()) } // Unregister removes a component from graceful shutdown func (m *Manager) Unregister(name string) { m.mu.Lock() defer m.mu.Unlock() if m.shutdownStarted { m.logger.Warn("Cannot unregister component '%s' - shutdown already started", name) return } delete(m.components, name) m.logger.Info("Unregistered component from graceful shutdown: %s", name) } // AddHook adds a hook to be called during a specific shutdown phase func (m *Manager) AddHook(phase Phase, hook Hook) { m.mu.Lock() defer m.mu.Unlock() m.hooks[phase] = append(m.hooks[phase], hook) } // Start begins listening for shutdown signals func (m *Manager) Start() { m.mu.Lock() if m.started { m.mu.Unlock() return } m.started = true m.mu.Unlock() signal.Notify(m.signalCh, m.signals...) go m.signalHandler() m.logger.Info("Graceful shutdown manager started, listening for signals: %v", m.signals) } // Stop initiates graceful shutdown programmatically func (m *Manager) Stop() { select { case m.shutdownCh <- struct{}{}: default: // Shutdown already initiated } } // Wait blocks until shutdown is complete func (m *Manager) Wait() { <-m.completedCh } // signalHandler handles OS signals and initiates shutdown func (m *Manager) signalHandler() { select { case sig := <-m.signalCh: m.logger.Info("Received signal %v, initiating graceful shutdown", sig) m.initiateShutdown() case <-m.shutdownCh: m.logger.Info("Programmatic shutdown requested") m.initiateShutdown() } } // initiateShutdown performs the actual shutdown process func (m *Manager) initiateShutdown() { m.mu.Lock() if m.shutdownStarted { m.mu.Unlock() return } m.shutdownStarted = true m.mu.Unlock() defer close(m.completedCh) // Create main shutdown context with timeout ctx, cancel := context.WithTimeout(context.Background(), m.timeout) defer cancel() // Create force shutdown context forceCtx, forceCancel := context.WithTimeout(context.Background(), m.forceTimeout) defer forceCancel() // Start force shutdown monitor go m.forceShutdownMonitor(forceCtx) startTime := time.Now() m.logger.Info("🛑 Beginning graceful shutdown (timeout: %v)", m.timeout) // Phase 1: Pre-shutdown hooks if err := m.executeHooks(ctx, PhasePreShutdown); err != nil { m.logger.Error("Pre-shutdown hooks failed: %v", err) } // Phase 2: Shutdown components in priority order if err := m.shutdownComponents(ctx); err != nil { m.logger.Error("Component shutdown failed: %v", err) } // Phase 3: Post-shutdown hooks if err := m.executeHooks(ctx, PhasePostShutdown); err != nil { m.logger.Error("Post-shutdown hooks failed: %v", err) } // Phase 4: Cleanup hooks if err := m.executeHooks(ctx, PhaseCleanup); err != nil { m.logger.Error("Cleanup hooks failed: %v", err) } elapsed := time.Since(startTime) m.logger.Info("✅ Graceful shutdown completed in %v", elapsed) } // executeHooks runs all hooks for a given phase func (m *Manager) executeHooks(ctx context.Context, phase Phase) error { m.mu.RLock() hooks := m.hooks[phase] m.mu.RUnlock() if len(hooks) == 0 { return nil } phaseName := map[Phase]string{ PhasePreShutdown: "pre-shutdown", PhaseShutdown: "shutdown", PhasePostShutdown: "post-shutdown", PhaseCleanup: "cleanup", }[phase] m.logger.Info("🔧 Executing %s hooks (%d hooks)", phaseName, len(hooks)) for i, hook := range hooks { select { case <-ctx.Done(): return ctx.Err() default: } if err := hook(ctx); err != nil { m.logger.Error("Hook %d in %s phase failed: %v", i+1, phaseName, err) // Continue with other hooks even if one fails } } return nil } // shutdownComponents shuts down all registered components in priority order func (m *Manager) shutdownComponents(ctx context.Context) error { m.mu.RLock() components := make([]Component, 0, len(m.components)) for _, comp := range m.components { components = append(components, comp) } m.mu.RUnlock() if len(components) == 0 { m.logger.Info("No components registered for shutdown") return nil } // Sort components by priority (lower numbers first) for i := 0; i < len(components)-1; i++ { for j := i + 1; j < len(components); j++ { if components[i].Priority() > components[j].Priority() { components[i], components[j] = components[j], components[i] } } } m.logger.Info("🔄 Shutting down %d components in priority order", len(components)) // Shutdown components with individual timeouts componentTimeout := m.timeout / time.Duration(len(components)) if componentTimeout < 5*time.Second { componentTimeout = 5 * time.Second } for _, comp := range components { select { case <-ctx.Done(): m.logger.Warn("Main shutdown context cancelled, attempting force shutdown") return m.forceShutdownRemainingComponents(components) default: } compCtx, compCancel := context.WithTimeout(ctx, componentTimeout) m.logger.Info("🔄 Shutting down component: %s (priority: %d, timeout: %v)", comp.Name(), comp.Priority(), componentTimeout) start := time.Now() if err := comp.Shutdown(compCtx); err != nil { elapsed := time.Since(start) m.logger.Error("❌ Component '%s' shutdown failed after %v: %v", comp.Name(), elapsed, err) } else { elapsed := time.Since(start) m.logger.Info("✅ Component '%s' shutdown completed in %v", comp.Name(), elapsed) } compCancel() } return nil } // forceShutdownMonitor monitors for force shutdown timeout func (m *Manager) forceShutdownMonitor(ctx context.Context) { <-ctx.Done() if ctx.Err() == context.DeadlineExceeded { m.logger.Error("💥 Force shutdown timeout reached, terminating process") os.Exit(1) } } // forceShutdownRemainingComponents attempts to force stop components that can be force-stopped func (m *Manager) forceShutdownRemainingComponents(components []Component) error { m.logger.Warn("🚨 Attempting force shutdown of remaining components") for _, comp := range components { if comp.CanForceStop() { m.logger.Warn("🔨 Force stopping component: %s", comp.Name()) // For force stop, we give a very short timeout forceCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) comp.Shutdown(forceCtx) cancel() } else { m.logger.Warn("⚠️ Component '%s' cannot be force stopped", comp.Name()) } } return nil } // GetStatus returns the current shutdown status func (m *Manager) GetStatus() *Status { m.mu.RLock() defer m.mu.RUnlock() status := &Status{ Started: m.started, ShutdownStarted: m.shutdownStarted, ComponentCount: len(m.components), Components: make([]string, 0, len(m.components)), } for name := range m.components { status.Components = append(status.Components, name) } return status } // Status represents the current shutdown manager status type Status struct { Started bool `json:"started"` ShutdownStarted bool `json:"shutdown_started"` ComponentCount int `json:"component_count"` Components []string `json:"components"` } // defaultLogger is a simple logger implementation type defaultLogger struct{} func (l *defaultLogger) Info(msg string, args ...interface{}) { fmt.Printf("[INFO] "+msg+"\n", args...) } func (l *defaultLogger) Warn(msg string, args ...interface{}) { fmt.Printf("[WARN] "+msg+"\n", args...) } func (l *defaultLogger) Error(msg string, args ...interface{}) { fmt.Printf("[ERROR] "+msg+"\n", args...) }