bzzz/pkg/shutdown/manager.go

package shutdown

import (
	"context"
	"fmt"
	"os"
	"os/signal"
	"sync"
	"syscall"
	"time"
)

// Manager provides coordinated graceful shutdown for all system components
type Manager struct {
	mu               sync.RWMutex
	components       map[string]Component
	hooks            map[Phase][]Hook
	timeout          time.Duration
	forceTimeout     time.Duration
	signals          []os.Signal
	signalCh         chan os.Signal
	shutdownCh       chan struct{}
	completedCh      chan struct{}
	started          bool
	shutdownStarted  bool
	logger           Logger
}

// Component represents a system component that needs graceful shutdown
type Component interface {
	// Name returns the component name for logging
	Name() string

	// Shutdown gracefully shuts down the component
	Shutdown(ctx context.Context) error

	// Priority returns the shutdown priority (lower numbers shut down first)
	Priority() int

	// CanForceStop returns true if the component can be force-stopped
	CanForceStop() bool
}

// Hook represents a function to be called during shutdown phases
type Hook func(ctx context.Context) error

// Phase represents different phases of the shutdown process
type Phase int

const (
	PhasePreShutdown Phase = iota // Before any components are shut down
	PhaseShutdown                 // During component shutdown
	PhasePostShutdown            // After all components are shut down
	PhaseCleanup                 // Final cleanup phase
)

// Logger interface for shutdown logging
type Logger interface {
	Info(msg string, args ...interface{})
	Warn(msg string, args ...interface{})
	Error(msg string, args ...interface{})
}

// NewManager creates a new shutdown manager
func NewManager(timeout time.Duration, logger Logger) *Manager {
	if timeout == 0 {
		timeout = 30 * time.Second
	}

	if logger == nil {
		logger = &defaultLogger{}
	}

	return &Manager{
		components:   make(map[string]Component),
		hooks:        make(map[Phase][]Hook),
		timeout:      timeout,
		forceTimeout: timeout + 15*time.Second,
		signals:      []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT},
		signalCh:     make(chan os.Signal, 1),
		shutdownCh:   make(chan struct{}),
		completedCh:  make(chan struct{}),
		logger:       logger,
	}
}

// Register adds a component for graceful shutdown
func (m *Manager) Register(component Component) {
	m.mu.Lock()
	defer m.mu.Unlock()

	if m.shutdownStarted {
		m.logger.Warn("Cannot register component '%s' - shutdown already started", component.Name())
		return
	}

	m.components[component.Name()] = component
	m.logger.Info("Registered component for graceful shutdown: %s (priority: %d)",
		component.Name(), component.Priority())
}

// Unregister removes a component from graceful shutdown
func (m *Manager) Unregister(name string) {
	m.mu.Lock()
	defer m.mu.Unlock()

	if m.shutdownStarted {
		m.logger.Warn("Cannot unregister component '%s' - shutdown already started", name)
		return
	}

	delete(m.components, name)
	m.logger.Info("Unregistered component from graceful shutdown: %s", name)
}

// AddHook adds a hook to be called during a specific shutdown phase
func (m *Manager) AddHook(phase Phase, hook Hook) {
	m.mu.Lock()
	defer m.mu.Unlock()

	m.hooks[phase] = append(m.hooks[phase], hook)
}

// Start begins listening for shutdown signals
func (m *Manager) Start() {
	m.mu.Lock()
	if m.started {
		m.mu.Unlock()
		return
	}
	m.started = true
	m.mu.Unlock()

	signal.Notify(m.signalCh, m.signals...)

	go m.signalHandler()
	m.logger.Info("Graceful shutdown manager started, listening for signals: %v", m.signals)
}

// Stop initiates graceful shutdown programmatically
func (m *Manager) Stop() {
	select {
	case m.shutdownCh <- struct{}{}:
	default:
		// Shutdown already initiated
	}
}

// Wait blocks until shutdown is complete
func (m *Manager) Wait() {
	<-m.completedCh
}

// signalHandler handles OS signals and initiates shutdown
func (m *Manager) signalHandler() {
	select {
	case sig := <-m.signalCh:
		m.logger.Info("Received signal %v, initiating graceful shutdown", sig)
		m.initiateShutdown()
	case <-m.shutdownCh:
		m.logger.Info("Programmatic shutdown requested")
		m.initiateShutdown()
	}
}

// initiateShutdown performs the actual shutdown process
func (m *Manager) initiateShutdown() {
	m.mu.Lock()
	if m.shutdownStarted {
		m.mu.Unlock()
		return
	}
	m.shutdownStarted = true
	m.mu.Unlock()

	defer close(m.completedCh)

	// Create main shutdown context with timeout
	ctx, cancel := context.WithTimeout(context.Background(), m.timeout)
	defer cancel()

	// Create force shutdown context
	forceCtx, forceCancel := context.WithTimeout(context.Background(), m.forceTimeout)
	defer forceCancel()

	// Start force shutdown monitor
	go m.forceShutdownMonitor(forceCtx)

	startTime := time.Now()
	m.logger.Info("🛑 Beginning graceful shutdown (timeout: %v)", m.timeout)

	// Phase 1: Pre-shutdown hooks
	if err := m.executeHooks(ctx, PhasePreShutdown); err != nil {
		m.logger.Error("Pre-shutdown hooks failed: %v", err)
	}

	// Phase 2: Shutdown components in priority order
	if err := m.shutdownComponents(ctx); err != nil {
		m.logger.Error("Component shutdown failed: %v", err)
	}

	// Phase 3: Post-shutdown hooks
	if err := m.executeHooks(ctx, PhasePostShutdown); err != nil {
		m.logger.Error("Post-shutdown hooks failed: %v", err)
	}

	// Phase 4: Cleanup hooks
	if err := m.executeHooks(ctx, PhaseCleanup); err != nil {
		m.logger.Error("Cleanup hooks failed: %v", err)
	}

	elapsed := time.Since(startTime)
	m.logger.Info("✅ Graceful shutdown completed in %v", elapsed)
}

// executeHooks runs all hooks for a given phase
func (m *Manager) executeHooks(ctx context.Context, phase Phase) error {
	m.mu.RLock()
	hooks := m.hooks[phase]
	m.mu.RUnlock()

	if len(hooks) == 0 {
		return nil
	}

	phaseName := map[Phase]string{
		PhasePreShutdown:  "pre-shutdown",
		PhaseShutdown:     "shutdown",
		PhasePostShutdown: "post-shutdown",
		PhaseCleanup:      "cleanup",
	}[phase]

	m.logger.Info("🔧 Executing %s hooks (%d hooks)", phaseName, len(hooks))

	for i, hook := range hooks {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		if err := hook(ctx); err != nil {
			m.logger.Error("Hook %d in %s phase failed: %v", i+1, phaseName, err)
			// Continue with other hooks even if one fails
		}
	}

	return nil
}

// shutdownComponents shuts down all registered components in priority order
func (m *Manager) shutdownComponents(ctx context.Context) error {
	m.mu.RLock()
	components := make([]Component, 0, len(m.components))
	for _, comp := range m.components {
		components = append(components, comp)
	}
	m.mu.RUnlock()

	if len(components) == 0 {
		m.logger.Info("No components registered for shutdown")
		return nil
	}

	// Sort components by priority (lower numbers first)
	for i := 0; i < len(components)-1; i++ {
		for j := i + 1; j < len(components); j++ {
			if components[i].Priority() > components[j].Priority() {
				components[i], components[j] = components[j], components[i]
			}
		}
	}

	m.logger.Info("🔄 Shutting down %d components in priority order", len(components))

	// Shutdown components with individual timeouts
	componentTimeout := m.timeout / time.Duration(len(components))
	if componentTimeout < 5*time.Second {
		componentTimeout = 5 * time.Second
	}

	for _, comp := range components {
		select {
		case <-ctx.Done():
			m.logger.Warn("Main shutdown context cancelled, attempting force shutdown")
			return m.forceShutdownRemainingComponents(components)
		default:
		}

		compCtx, compCancel := context.WithTimeout(ctx, componentTimeout)

		m.logger.Info("🔄 Shutting down component: %s (priority: %d, timeout: %v)",
			comp.Name(), comp.Priority(), componentTimeout)

		start := time.Now()
		if err := comp.Shutdown(compCtx); err != nil {
			elapsed := time.Since(start)
			m.logger.Error("❌ Component '%s' shutdown failed after %v: %v",
				comp.Name(), elapsed, err)
		} else {
			elapsed := time.Since(start)
			m.logger.Info("✅ Component '%s' shutdown completed in %v",
				comp.Name(), elapsed)
		}

		compCancel()
	}

	return nil
}

// forceShutdownMonitor monitors for force shutdown timeout
func (m *Manager) forceShutdownMonitor(ctx context.Context) {
	<-ctx.Done()
	if ctx.Err() == context.DeadlineExceeded {
		m.logger.Error("💥 Force shutdown timeout reached, terminating process")
		os.Exit(1)
	}
}

// forceShutdownRemainingComponents attempts to force stop components that can be force-stopped
func (m *Manager) forceShutdownRemainingComponents(components []Component) error {
	m.logger.Warn("🚨 Attempting force shutdown of remaining components")

	for _, comp := range components {
		if comp.CanForceStop() {
			m.logger.Warn("🔨 Force stopping component: %s", comp.Name())
			// For force stop, we give a very short timeout
			forceCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
			comp.Shutdown(forceCtx)
			cancel()
		} else {
			m.logger.Warn("⚠️  Component '%s' cannot be force stopped", comp.Name())
		}
	}

	return nil
}

// GetStatus returns the current shutdown status
func (m *Manager) GetStatus() *Status {
	m.mu.RLock()
	defer m.mu.RUnlock()

	status := &Status{
		Started:         m.started,
		ShutdownStarted: m.shutdownStarted,
		ComponentCount:  len(m.components),
		Components:      make([]string, 0, len(m.components)),
	}

	for name := range m.components {
		status.Components = append(status.Components, name)
	}

	return status
}

// Status represents the current shutdown manager status
type Status struct {
	Started         bool     `json:"started"`
	ShutdownStarted bool     `json:"shutdown_started"`
	ComponentCount  int      `json:"component_count"`
	Components      []string `json:"components"`
}

// defaultLogger is a simple logger implementation
type defaultLogger struct{}

func (l *defaultLogger) Info(msg string, args ...interface{}) {
	fmt.Printf("[INFO] "+msg+"\n", args...)
}

func (l *defaultLogger) Warn(msg string, args ...interface{}) {
	fmt.Printf("[WARN] "+msg+"\n", args...)
}

func (l *defaultLogger) Error(msg string, args ...interface{}) {
	fmt.Printf("[ERROR] "+msg+"\n", args...)
}