Enhance deployment system with retry functionality and improved UX

Major Improvements: - Added retry deployment buttons in machine list for failed deployments - Added retry button in SSH console modal footer for enhanced UX - Enhanced deployment process with comprehensive cleanup of existing services - Improved binary installation with password-based sudo authentication - Updated configuration generation to include all required sections (agent, ai, network, security) - Fixed deployment verification and error handling Security Enhancements: - Enhanced verifiedStopExistingServices with thorough cleanup process - Improved binary copying with proper sudo authentication - Added comprehensive configuration validation UX Improvements: - Users can retry deployments without re-running machine discovery - Retry buttons available from both machine list and console modal - Real-time deployment progress with detailed console output - Clear error states with actionable retry options Technical Changes: - Modified ServiceDeployment.tsx with retry button components - Enhanced api/setup_manager.go with improved deployment functions - Updated main.go with command line argument support (--config, --setup) - Added comprehensive zero-trust security validation system 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-31 10:23:27 +10:00
parent df4d98bf30
commit be761cfe20
234 changed files with 7508 additions and 38528 deletions
--- a/pkg/election/slurp_types.go
+++ b/pkg/election/slurp_types.go
@@ -1,233 +0,0 @@
-package election
-
-import (
-	"context"
-	"time"
-)
-
-// SLURPElectionConfig holds SLURP-specific election configuration
-type SLURPElectionConfig struct {
-	// Auto-start context generation when becoming admin
-	AutoStartGeneration bool
-	
-	// Delay before starting context generation
-	GenerationStartDelay time.Duration
-	
-	// Timeout for stopping context generation
-	GenerationStopTimeout time.Duration
-	
-	// Health check interval for context generation
-	ContextHealthCheckInterval time.Duration
-	
-	// Maximum allowed context generation errors before declaring unhealthy
-	MaxContextErrors int
-	
-	// Context generation timeout
-	ContextGenerationTimeout time.Duration
-	
-	// Enable advanced context caching
-	EnableContextCaching bool
-	
-	// Context cache TTL
-	ContextCacheTTL time.Duration
-	
-	// Maximum concurrent context generation requests
-	MaxConcurrentContextGen int
-	
-	// Enable distributed context generation (across multiple nodes)
-	EnableDistributedGeneration bool
-}
-
-// DefaultSLURPElectionConfig returns default SLURP election configuration
-func DefaultSLURPElectionConfig() *SLURPElectionConfig {
-	return &SLURPElectionConfig{
-		AutoStartGeneration:         true,
-		GenerationStartDelay:        2 * time.Second,
-		GenerationStopTimeout:       30 * time.Second,
-		ContextHealthCheckInterval:  15 * time.Second,
-		MaxContextErrors:            3,
-		ContextGenerationTimeout:    60 * time.Second,
-		EnableContextCaching:        true,
-		ContextCacheTTL:             5 * time.Minute,
-		MaxConcurrentContextGen:     10,
-		EnableDistributedGeneration: false,
-	}
-}
-
-// ContextManager interface for managing context generation
-type ContextManager interface {
-	GetGenerationStatus() (*GenerationStatus, error)
-	RequestContextGeneration(req *ContextGenerationRequest) error
-	StopGeneration() error
-	GetActiveRequests() ([]*ContextGenerationRequest, error)
-	GetCompletedRequests(limit int) ([]*ContextGenerationRequest, error)
-}
-
-// GenerationStatus represents the status of context generation
-type GenerationStatus struct {
-	LeaderID            string             `json:"leader_id"`
-	ActiveRequests      int                `json:"active_requests"`
-	CompletedRequests   int64              `json:"completed_requests"`
-	FailedRequests      int64              `json:"failed_requests"`
-	AverageLatency      time.Duration      `json:"average_latency"`
-	LastRequestTime     time.Time          `json:"last_request_time"`
-	GenerationCapacity  int                `json:"generation_capacity"`
-	ContextCacheSize    int                `json:"context_cache_size"`
-	CacheHitRate        float64            `json:"cache_hit_rate"`
-	ActiveTasks         int                `json:"active_tasks"`
-	HealthStatus        string             `json:"health_status"`
-}
-
-// ContextGenerationRequest represents a request for context generation
-type ContextGenerationRequest struct {
-	RequestID     string                 `json:"request_id"`
-	RequestorID   string                 `json:"requestor_id"`
-	ContextType   string                 `json:"context_type"`
-	Parameters    map[string]interface{} `json:"parameters"`
-	Priority      int                    `json:"priority"`
-	RequestedAt   time.Time             `json:"requested_at"`
-	CompletedAt   *time.Time            `json:"completed_at,omitempty"`
-	Status        string                `json:"status"` // "pending", "processing", "completed", "failed"
-	Result        *ContextResult        `json:"result,omitempty"`
-	ErrorMessage  string                `json:"error_message,omitempty"`
-}
-
-// ContextResult holds the result of context generation
-type ContextResult struct {
-	Context         string                 `json:"context"`
-	Metadata        map[string]interface{} `json:"metadata"`
-	GeneratedAt     time.Time             `json:"generated_at"`
-	GenerationTime  time.Duration         `json:"generation_time"`
-	CacheUsed       bool                  `json:"cache_used"`
-	Quality         float64               `json:"quality"` // 0.0-1.0
-	TokenCount      int                   `json:"token_count"`
-}
-
-// ContextGenerationJob represents an active context generation job
-type ContextGenerationJob struct {
-	JobID       string                     `json:"job_id"`
-	Request     *ContextGenerationRequest  `json:"request"`
-	StartedAt   time.Time                 `json:"started_at"`
-	WorkerID    string                    `json:"worker_id"`
-	Status      string                    `json:"status"`
-	Progress    float64                   `json:"progress"` // 0.0-1.0
-	ETA         *time.Time               `json:"eta,omitempty"`
-}
-
-// ContextLeadershipCallbacks defines callbacks for context leadership events
-type ContextLeadershipCallbacks struct {
-	OnBecomeContextLeader       func(ctx context.Context, term int64) error
-	OnLoseContextLeadership     func(ctx context.Context, reason string) error
-	OnContextLeaderChanged      func(oldLeader, newLeader string, term int64)
-	OnContextGenerationStarted  func(nodeID string)
-	OnContextGenerationStopped  func(nodeID string, reason string)
-	OnContextError              func(err error, severity ErrorSeverity)
-	OnContextRequestReceived    func(req *ContextGenerationRequest)
-	OnContextRequestCompleted   func(req *ContextGenerationRequest, result *ContextResult)
-}
-
-// ErrorSeverity defines the severity levels for context errors
-type ErrorSeverity string
-
-const (
-	ErrorSeverityLow    ErrorSeverity = "low"
-	ErrorSeverityMedium ErrorSeverity = "medium"
-	ErrorSeverityHigh   ErrorSeverity = "high"
-	ErrorSeverityCritical ErrorSeverity = "critical"
-)
-
-// ContextFailoverState holds state for context leadership failover
-type ContextFailoverState struct {
-	LeaderID        string                         `json:"leader_id"`
-	Term            int64                         `json:"term"`
-	TransferTime    time.Time                     `json:"transfer_time"`
-	StateVersion    int64                         `json:"state_version"`
-	QueuedRequests  []*ContextGenerationRequest   `json:"queued_requests"`
-	ActiveJobs      map[string]*ContextGenerationJob `json:"active_jobs"`
-	ManagerConfig   *ManagerConfig                `json:"manager_config"`
-	ClusterState    *ContextClusterState          `json:"cluster_state"`
-	HealthSnapshot  *ContextClusterHealth         `json:"health_snapshot"`
-	Checksum        string                        `json:"checksum"`
-}
-
-// ManagerConfig holds configuration for the context manager
-type ManagerConfig struct {
-	MaxConcurrentJobs    int           `json:"max_concurrent_jobs"`
-	DefaultTimeout       time.Duration `json:"default_timeout"`
-	EnableCaching        bool          `json:"enable_caching"`
-	CacheTTL            time.Duration `json:"cache_ttl"`
-	RetryAttempts       int           `json:"retry_attempts"`
-	WorkerPoolSize      int           `json:"worker_pool_size"`
-}
-
-// DefaultManagerConfig returns default manager configuration
-func DefaultManagerConfig() *ManagerConfig {
-	return &ManagerConfig{
-		MaxConcurrentJobs: 10,
-		DefaultTimeout:    60 * time.Second,
-		EnableCaching:     true,
-		CacheTTL:         5 * time.Minute,
-		RetryAttempts:    3,
-		WorkerPoolSize:   5,
-	}
-}
-
-// ContextClusterState holds the state of the context generation cluster
-type ContextClusterState struct {
-	Nodes            map[string]*ContextNodeInfo `json:"nodes"`
-	TotalCapacity    int                        `json:"total_capacity"`
-	AvailableCapacity int                       `json:"available_capacity"`
-	LoadBalance      float64                    `json:"load_balance"`
-	LastUpdate       time.Time                  `json:"last_update"`
-}
-
-// ContextNodeInfo holds information about a node in the context cluster
-type ContextNodeInfo struct {
-	NodeID           string    `json:"node_id"`
-	Capacity         int       `json:"capacity"`
-	ActiveJobs       int       `json:"active_jobs"`
-	LastSeen         time.Time `json:"last_seen"`
-	HealthStatus     string    `json:"health_status"`
-	AverageLatency   time.Duration `json:"average_latency"`
-	SuccessRate      float64   `json:"success_rate"`
-}
-
-// ContextClusterHealth represents the overall health of the context generation cluster
-type ContextClusterHealth struct {
-	TotalNodes         int       `json:"total_nodes"`
-	HealthyNodes       int       `json:"healthy_nodes"`
-	UnhealthyNodes     int       `json:"unhealthy_nodes"`
-	GenerationActive   bool      `json:"generation_active"`
-	AverageLatency     time.Duration `json:"average_latency"`
-	SuccessRate        float64   `json:"success_rate"`
-	OverallHealthScore float64   `json:"overall_health_score"` // 0.0-1.0
-	LastElection       time.Time `json:"last_election"`
-	NextHealthCheck    time.Time `json:"next_health_check"`
-	CapacityUtilization float64  `json:"capacity_utilization"`
-	ErrorRate          float64   `json:"error_rate"`
-	Issues             []string  `json:"issues,omitempty"`
-}
-
-// ContextStateValidation holds the results of context state validation
-type ContextStateValidation struct {
-	Valid               bool      `json:"valid"`
-	ValidatedAt         time.Time `json:"validated_at"`
-	ValidatedBy         string    `json:"validated_by"`
-	ValidationDuration  time.Duration `json:"validation_duration"`
-	ChecksumValid       bool      `json:"checksum_valid"`
-	TimestampValid      bool      `json:"timestamp_valid"`
-	VersionConsistent   bool      `json:"version_consistent"`
-	QueueStateValid     bool      `json:"queue_state_valid"`
-	ClusterStateValid   bool      `json:"cluster_state_valid"`
-	ConfigValid         bool      `json:"config_valid"`
-	RequiresRecovery    bool      `json:"requires_recovery"`
-	Issues              []string  `json:"issues,omitempty"`
-	RecoverySteps       []string  `json:"recovery_steps,omitempty"`
-}
-
-// LeaderInfo contains information about the current context leader
-type LeaderInfo struct {
-	NodeID     string    `json:"node_id"`
-	Term       int64     `json:"term"`
-	ElectedAt  time.Time `json:"elected_at"`
-}