 9bdcbe0447
			
		
	
	9bdcbe0447
	
	
	
		
			
			Major integrations and fixes: - Added BACKBEAT SDK integration for P2P operation timing - Implemented beat-aware status tracking for distributed operations - Added Docker secrets support for secure license management - Resolved KACHING license validation via HTTPS/TLS - Updated docker-compose configuration for clean stack deployment - Disabled rollback policies to prevent deployment failures - Added license credential storage (CHORUS-DEV-MULTI-001) Technical improvements: - BACKBEAT P2P operation tracking with phase management - Enhanced configuration system with file-based secrets - Improved error handling for license validation - Clean separation of KACHING and CHORUS deployment stacks 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
		
			
				
	
	
		
			261 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			261 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package election
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"time"
 | |
| 
 | |
| 	// slurpContext "chorus/pkg/slurp/context"
 | |
| )
 | |
| 
 | |
| // SLURPElection extends the base Election interface to include Project Manager contextual intelligence duties
 | |
| type SLURPElection interface {
 | |
| 	Election // Embed base election interface
 | |
| 	
 | |
| 	// Project Manager specific capabilities
 | |
| 	
 | |
| 	// RegisterContextManager registers a SLURP context manager for leader duties
 | |
| 	RegisterContextManager(manager ContextManager) error
 | |
| 	
 | |
| 	// IsContextLeader returns whether this node is the current context generation leader
 | |
| 	IsContextLeader() bool
 | |
| 	
 | |
| 	// GetContextManager returns the registered context manager (if leader)
 | |
| 	GetContextManager() (ContextManager, error)
 | |
| 	
 | |
| 	// TransferContextLeadership initiates graceful context leadership transfer
 | |
| 	TransferContextLeadership(ctx context.Context, targetNodeID string) error
 | |
| 	
 | |
| 	// GetContextLeaderInfo returns information about current context leader
 | |
| 	GetContextLeaderInfo() (*LeaderInfo, error)
 | |
| 	
 | |
| 	// Context generation coordination
 | |
| 	
 | |
| 	// StartContextGeneration begins context generation operations (leader only)
 | |
| 	StartContextGeneration(ctx context.Context) error
 | |
| 	
 | |
| 	// StopContextGeneration stops context generation operations
 | |
| 	StopContextGeneration(ctx context.Context) error
 | |
| 	
 | |
| 	// GetContextGenerationStatus returns status of context operations
 | |
| 	GetContextGenerationStatus() (*GenerationStatus, error)
 | |
| 	
 | |
| 	// RequestContextGeneration queues a context generation request
 | |
| 	RequestContextGeneration(req *ContextGenerationRequest) error
 | |
| 	
 | |
| 	// Context leadership monitoring
 | |
| 	
 | |
| 	// SetContextLeadershipCallbacks sets callbacks for context leadership changes
 | |
| 	SetContextLeadershipCallbacks(callbacks *ContextLeadershipCallbacks) error
 | |
| 	
 | |
| 	// GetContextClusterHealth returns health of context generation cluster
 | |
| 	GetContextClusterHealth() (*ContextClusterHealth, error)
 | |
| 	
 | |
| 	// Failover and recovery
 | |
| 	
 | |
| 	// PrepareContextFailover prepares context state for leadership failover
 | |
| 	PrepareContextFailover(ctx context.Context) (*ContextFailoverState, error)
 | |
| 	
 | |
| 	// ExecuteContextFailover executes context leadership failover
 | |
| 	ExecuteContextFailover(ctx context.Context, state *ContextFailoverState) error
 | |
| 	
 | |
| 	// ValidateContextState validates context failover state
 | |
| 	ValidateContextState(state *ContextFailoverState) (*ContextStateValidation, error)
 | |
| }
 | |
| 
 | |
| // Election represents the base election interface (extracted from existing code)
 | |
| type Election interface {
 | |
| 	// Basic election operations
 | |
| 	Start() error
 | |
| 	Stop()
 | |
| 	TriggerElection(trigger ElectionTrigger)
 | |
| 	
 | |
| 	// Leadership queries
 | |
| 	GetCurrentAdmin() string
 | |
| 	IsCurrentAdmin() bool
 | |
| 	GetElectionState() ElectionState
 | |
| 	
 | |
| 	// Callback management
 | |
| 	SetCallbacks(onAdminChanged func(oldAdmin, newAdmin string), onElectionComplete func(winner string))
 | |
| 	
 | |
| 	// Admin operations
 | |
| 	SendAdminHeartbeat() error
 | |
| }
 | |
| 
 | |
| // ContextLeadershipCallbacks is defined in interfaces.go
 | |
| 
 | |
| // ContextClusterHealth represents health of context generation cluster
 | |
| type ContextClusterHealth struct {
 | |
| 	TotalNodes          int                     `json:"total_nodes"`           // Total nodes in cluster
 | |
| 	HealthyNodes        int                     `json:"healthy_nodes"`         // Healthy nodes
 | |
| 	UnhealthyNodes      []string                `json:"unhealthy_nodes"`       // Unhealthy node IDs
 | |
| 	CurrentLeader       string                  `json:"current_leader"`        // Current context leader
 | |
| 	LeaderHealthy       bool                    `json:"leader_healthy"`        // Leader health status
 | |
| 	GenerationActive    bool                    `json:"generation_active"`     // Context generation status
 | |
| 	QueueHealth         *QueueHealthStatus      `json:"queue_health"`          // Queue health
 | |
| 	NodeHealths         map[string]*NodeHealthStatus `json:"node_healths"`    // Per-node health
 | |
| 	LastElection        time.Time               `json:"last_election"`         // Last election time
 | |
| 	NextHealthCheck     time.Time               `json:"next_health_check"`     // Next health check
 | |
| 	OverallHealthScore  float64                 `json:"overall_health_score"`  // Overall health (0-1)
 | |
| }
 | |
| 
 | |
| // QueueHealthStatus represents health of context generation queue
 | |
| type QueueHealthStatus struct {
 | |
| 	QueueLength         int           `json:"queue_length"`          // Current queue length
 | |
| 	MaxQueueSize        int           `json:"max_queue_size"`        // Maximum queue capacity
 | |
| 	QueueUtilization    float64       `json:"queue_utilization"`     // Queue utilization (0-1)
 | |
| 	ProcessingRate      float64       `json:"processing_rate"`       // Requests per second
 | |
| 	AverageWaitTime     time.Duration `json:"average_wait_time"`     // Average wait time
 | |
| 	OldestRequest       *time.Time    `json:"oldest_request"`        // Oldest queued request
 | |
| 	HealthScore         float64       `json:"health_score"`          // Queue health score (0-1)
 | |
| 	Issues              []string      `json:"issues,omitempty"`      // Queue health issues
 | |
| }
 | |
| 
 | |
| // NodeHealthStatus represents health status of individual node
 | |
| type NodeHealthStatus struct {
 | |
| 	NodeID              string        `json:"node_id"`               // Node ID
 | |
| 	IsLeader            bool          `json:"is_leader"`             // Whether node is leader
 | |
| 	LastHeartbeat       time.Time     `json:"last_heartbeat"`        // Last heartbeat
 | |
| 	ResponseTime        time.Duration `json:"response_time"`         // Response time
 | |
| 	LoadAverage         float64       `json:"load_average"`          // System load
 | |
| 	ActiveTasks         int           `json:"active_tasks"`          // Active context tasks
 | |
| 	CompletedTasks      int64         `json:"completed_tasks"`       // Completed tasks
 | |
| 	FailedTasks         int64         `json:"failed_tasks"`          // Failed tasks
 | |
| 	HealthScore         float64       `json:"health_score"`          // Health score (0-1)
 | |
| 	Status              NodeStatus    `json:"status"`                // Node status
 | |
| 	Issues              []string      `json:"issues,omitempty"`      // Health issues
 | |
| }
 | |
| 
 | |
| // NodeStatus represents status of cluster node
 | |
| type NodeStatus string
 | |
| 
 | |
| const (
 | |
| 	NodeStatusHealthy     NodeStatus = "healthy"      // Node is healthy
 | |
| 	NodeStatusDegraded    NodeStatus = "degraded"     // Node performance degraded
 | |
| 	NodeStatusUnhealthy   NodeStatus = "unhealthy"    // Node is unhealthy
 | |
| 	NodeStatusUnresponsive NodeStatus = "unresponsive" // Node not responding
 | |
| 	NodeStatusOffline     NodeStatus = "offline"      // Node is offline
 | |
| )
 | |
| 
 | |
| // ContextFailoverState represents state to transfer during context leadership failover
 | |
| type ContextFailoverState struct {
 | |
| 	// Basic failover state
 | |
| 	LeaderID            string                              `json:"leader_id"`             // Previous leader
 | |
| 	Term                int64                               `json:"term"`                  // Leadership term
 | |
| 	TransferTime        time.Time                           `json:"transfer_time"`         // When transfer occurred
 | |
| 	
 | |
| 	// Context generation state
 | |
| 	QueuedRequests      []*ContextGenerationRequest  `json:"queued_requests"`       // Queued requests
 | |
| 	ActiveJobs          map[string]*ContextGenerationJob `json:"active_jobs"`       // Active jobs
 | |
| 	CompletedJobs       []*ContextGenerationJob      `json:"completed_jobs"`        // Recent completed jobs
 | |
| 	
 | |
| 	// Cluster coordination state
 | |
| 	ClusterState        *ClusterState                `json:"cluster_state"`         // Current cluster state
 | |
| 	ResourceAllocations map[string]*ResourceAllocation `json:"resource_allocations"` // Resource allocations
 | |
| 	NodeAssignments     map[string][]string                 `json:"node_assignments"`      // Task assignments per node
 | |
| 	
 | |
| 	// Configuration state
 | |
| 	ManagerConfig       *ManagerConfig               `json:"manager_config"`        // Manager configuration
 | |
| 	GenerationPolicy    *GenerationPolicy            `json:"generation_policy"`     // Generation policy
 | |
| 	QueuePolicy         *QueuePolicy                 `json:"queue_policy"`          // Queue policy
 | |
| 	
 | |
| 	// State validation
 | |
| 	StateVersion        int64                               `json:"state_version"`         // State version
 | |
| 	Checksum            string                              `json:"checksum"`              // State checksum
 | |
| 	HealthSnapshot      *ContextClusterHealth               `json:"health_snapshot"`       // Health at transfer
 | |
| 	
 | |
| 	// Transfer metadata
 | |
| 	TransferReason      string                              `json:"transfer_reason"`       // Reason for transfer
 | |
| 	TransferSource      string                              `json:"transfer_source"`       // Who initiated transfer
 | |
| 	TransferDuration    time.Duration                       `json:"transfer_duration"`     // How long transfer took
 | |
| 	ValidationResults   *ContextStateValidation             `json:"validation_results"`    // State validation results
 | |
| }
 | |
| 
 | |
| // ContextStateValidation represents validation results for failover state
 | |
| type ContextStateValidation struct {
 | |
| 	Valid               bool      `json:"valid"`                // Overall validity
 | |
| 	Issues              []string  `json:"issues,omitempty"`     // Validation issues
 | |
| 	
 | |
| 	// Component validations
 | |
| 	ChecksumValid       bool      `json:"checksum_valid"`       // Checksum validation
 | |
| 	VersionConsistent   bool      `json:"version_consistent"`   // Version consistency
 | |
| 	TimestampValid      bool      `json:"timestamp_valid"`      // Timestamp validity
 | |
| 	QueueStateValid     bool      `json:"queue_state_valid"`    // Queue state validity
 | |
| 	ClusterStateValid   bool      `json:"cluster_state_valid"`  // Cluster state validity
 | |
| 	ConfigValid         bool      `json:"config_valid"`         // Configuration validity
 | |
| 	
 | |
| 	// Validation metadata
 | |
| 	ValidatedAt         time.Time `json:"validated_at"`         // When validation occurred
 | |
| 	ValidatedBy         string    `json:"validated_by"`         // Node that performed validation
 | |
| 	ValidationDuration  time.Duration `json:"validation_duration"` // Time taken for validation
 | |
| 	
 | |
| 	// Recommendations
 | |
| 	Recommendations     []string  `json:"recommendations,omitempty"` // Recommendations for issues
 | |
| 	RequiresRecovery    bool      `json:"requires_recovery"`     // Whether recovery is needed
 | |
| 	RecoverySteps       []string  `json:"recovery_steps,omitempty"` // Recovery steps if needed
 | |
| }
 | |
| 
 | |
| // ErrorSeverity is defined in interfaces.go
 | |
| 
 | |
| // SLURPElectionConfig represents configuration for SLURP-enhanced elections
 | |
| type SLURPElectionConfig struct {
 | |
| 	// Context leadership configuration
 | |
| 	EnableContextLeadership     bool          `json:"enable_context_leadership"`     // Enable context leadership
 | |
| 	ContextLeadershipWeight     float64       `json:"context_leadership_weight"`     // Weight for context leadership scoring
 | |
| 	RequireContextCapability    bool          `json:"require_context_capability"`    // Require context capability for leadership
 | |
| 	
 | |
| 	// Context generation configuration
 | |
| 	AutoStartGeneration         bool          `json:"auto_start_generation"`         // Auto-start generation on leadership
 | |
| 	GenerationStartDelay        time.Duration `json:"generation_start_delay"`        // Delay before starting generation
 | |
| 	GenerationStopTimeout       time.Duration `json:"generation_stop_timeout"`       // Timeout for stopping generation
 | |
| 	
 | |
| 	// Failover configuration
 | |
| 	ContextFailoverTimeout      time.Duration `json:"context_failover_timeout"`      // Context failover timeout
 | |
| 	StateTransferTimeout        time.Duration `json:"state_transfer_timeout"`        // State transfer timeout
 | |
| 	ValidationTimeout           time.Duration `json:"validation_timeout"`            // State validation timeout
 | |
| 	RequireStateValidation      bool          `json:"require_state_validation"`      // Require state validation
 | |
| 	
 | |
| 	// Health monitoring configuration
 | |
| 	ContextHealthCheckInterval  time.Duration `json:"context_health_check_interval"` // Context health check interval
 | |
| 	ClusterHealthThreshold      float64       `json:"cluster_health_threshold"`      // Minimum cluster health for operations
 | |
| 	LeaderHealthThreshold       float64       `json:"leader_health_threshold"`       // Minimum leader health
 | |
| 	
 | |
| 	// Queue management configuration
 | |
| 	MaxQueueTransferSize        int           `json:"max_queue_transfer_size"`       // Max requests to transfer
 | |
| 	QueueDrainTimeout           time.Duration `json:"queue_drain_timeout"`           // Timeout for draining queue
 | |
| 	PreserveCompletedJobs       bool          `json:"preserve_completed_jobs"`       // Preserve completed jobs on transfer
 | |
| 	
 | |
| 	// Coordination configuration
 | |
| 	CoordinationTimeout         time.Duration `json:"coordination_timeout"`          // Coordination operation timeout
 | |
| 	MaxCoordinationRetries      int           `json:"max_coordination_retries"`      // Max coordination retries
 | |
| 	CoordinationBackoff         time.Duration `json:"coordination_backoff"`          // Backoff between coordination retries
 | |
| }
 | |
| 
 | |
| // DefaultSLURPElectionConfig returns default configuration for SLURP elections
 | |
| func DefaultSLURPElectionConfig() *SLURPElectionConfig {
 | |
| 	return &SLURPElectionConfig{
 | |
| 		EnableContextLeadership:     true,
 | |
| 		ContextLeadershipWeight:     0.3, // 30% weight for context capabilities
 | |
| 		RequireContextCapability:    true,
 | |
| 		
 | |
| 		AutoStartGeneration:         true,
 | |
| 		GenerationStartDelay:        5 * time.Second,
 | |
| 		GenerationStopTimeout:       30 * time.Second,
 | |
| 		
 | |
| 		ContextFailoverTimeout:      60 * time.Second,
 | |
| 		StateTransferTimeout:        30 * time.Second,
 | |
| 		ValidationTimeout:           10 * time.Second,
 | |
| 		RequireStateValidation:      true,
 | |
| 		
 | |
| 		ContextHealthCheckInterval:  30 * time.Second,
 | |
| 		ClusterHealthThreshold:      0.7, // 70% minimum cluster health
 | |
| 		LeaderHealthThreshold:       0.8, // 80% minimum leader health
 | |
| 		
 | |
| 		MaxQueueTransferSize:        1000,
 | |
| 		QueueDrainTimeout:           60 * time.Second,
 | |
| 		PreserveCompletedJobs:       true,
 | |
| 		
 | |
| 		CoordinationTimeout:         10 * time.Second,
 | |
| 		MaxCoordinationRetries:      3,
 | |
| 		CoordinationBackoff:         2 * time.Second,
 | |
| 	}
 | |
| } |