Enhance deployment system with retry functionality and improved UX

Major Improvements:
- Added retry deployment buttons in machine list for failed deployments
- Added retry button in SSH console modal footer for enhanced UX
- Enhanced deployment process with comprehensive cleanup of existing services
- Improved binary installation with password-based sudo authentication
- Updated configuration generation to include all required sections (agent, ai, network, security)
- Fixed deployment verification and error handling

Security Enhancements:
- Enhanced verifiedStopExistingServices with thorough cleanup process
- Improved binary copying with proper sudo authentication
- Added comprehensive configuration validation

UX Improvements:
- Users can retry deployments without re-running machine discovery
- Retry buttons available from both machine list and console modal
- Real-time deployment progress with detailed console output
- Clear error states with actionable retry options

Technical Changes:
- Modified ServiceDeployment.tsx with retry button components
- Enhanced api/setup_manager.go with improved deployment functions
- Updated main.go with command line argument support (--config, --setup)
- Added comprehensive zero-trust security validation system

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-08-31 10:23:27 +10:00
parent df4d98bf30
commit be761cfe20
234 changed files with 7508 additions and 38528 deletions

View File

@@ -1,233 +0,0 @@
package election
import (
"context"
"time"
)
// SLURPElectionConfig holds SLURP-specific election configuration
type SLURPElectionConfig struct {
// Auto-start context generation when becoming admin
AutoStartGeneration bool
// Delay before starting context generation
GenerationStartDelay time.Duration
// Timeout for stopping context generation
GenerationStopTimeout time.Duration
// Health check interval for context generation
ContextHealthCheckInterval time.Duration
// Maximum allowed context generation errors before declaring unhealthy
MaxContextErrors int
// Context generation timeout
ContextGenerationTimeout time.Duration
// Enable advanced context caching
EnableContextCaching bool
// Context cache TTL
ContextCacheTTL time.Duration
// Maximum concurrent context generation requests
MaxConcurrentContextGen int
// Enable distributed context generation (across multiple nodes)
EnableDistributedGeneration bool
}
// DefaultSLURPElectionConfig returns default SLURP election configuration
func DefaultSLURPElectionConfig() *SLURPElectionConfig {
return &SLURPElectionConfig{
AutoStartGeneration: true,
GenerationStartDelay: 2 * time.Second,
GenerationStopTimeout: 30 * time.Second,
ContextHealthCheckInterval: 15 * time.Second,
MaxContextErrors: 3,
ContextGenerationTimeout: 60 * time.Second,
EnableContextCaching: true,
ContextCacheTTL: 5 * time.Minute,
MaxConcurrentContextGen: 10,
EnableDistributedGeneration: false,
}
}
// ContextManager interface for managing context generation
type ContextManager interface {
GetGenerationStatus() (*GenerationStatus, error)
RequestContextGeneration(req *ContextGenerationRequest) error
StopGeneration() error
GetActiveRequests() ([]*ContextGenerationRequest, error)
GetCompletedRequests(limit int) ([]*ContextGenerationRequest, error)
}
// GenerationStatus represents the status of context generation
type GenerationStatus struct {
LeaderID string `json:"leader_id"`
ActiveRequests int `json:"active_requests"`
CompletedRequests int64 `json:"completed_requests"`
FailedRequests int64 `json:"failed_requests"`
AverageLatency time.Duration `json:"average_latency"`
LastRequestTime time.Time `json:"last_request_time"`
GenerationCapacity int `json:"generation_capacity"`
ContextCacheSize int `json:"context_cache_size"`
CacheHitRate float64 `json:"cache_hit_rate"`
ActiveTasks int `json:"active_tasks"`
HealthStatus string `json:"health_status"`
}
// ContextGenerationRequest represents a request for context generation
type ContextGenerationRequest struct {
RequestID string `json:"request_id"`
RequestorID string `json:"requestor_id"`
ContextType string `json:"context_type"`
Parameters map[string]interface{} `json:"parameters"`
Priority int `json:"priority"`
RequestedAt time.Time `json:"requested_at"`
CompletedAt *time.Time `json:"completed_at,omitempty"`
Status string `json:"status"` // "pending", "processing", "completed", "failed"
Result *ContextResult `json:"result,omitempty"`
ErrorMessage string `json:"error_message,omitempty"`
}
// ContextResult holds the result of context generation
type ContextResult struct {
Context string `json:"context"`
Metadata map[string]interface{} `json:"metadata"`
GeneratedAt time.Time `json:"generated_at"`
GenerationTime time.Duration `json:"generation_time"`
CacheUsed bool `json:"cache_used"`
Quality float64 `json:"quality"` // 0.0-1.0
TokenCount int `json:"token_count"`
}
// ContextGenerationJob represents an active context generation job
type ContextGenerationJob struct {
JobID string `json:"job_id"`
Request *ContextGenerationRequest `json:"request"`
StartedAt time.Time `json:"started_at"`
WorkerID string `json:"worker_id"`
Status string `json:"status"`
Progress float64 `json:"progress"` // 0.0-1.0
ETA *time.Time `json:"eta,omitempty"`
}
// ContextLeadershipCallbacks defines callbacks for context leadership events
type ContextLeadershipCallbacks struct {
OnBecomeContextLeader func(ctx context.Context, term int64) error
OnLoseContextLeadership func(ctx context.Context, reason string) error
OnContextLeaderChanged func(oldLeader, newLeader string, term int64)
OnContextGenerationStarted func(nodeID string)
OnContextGenerationStopped func(nodeID string, reason string)
OnContextError func(err error, severity ErrorSeverity)
OnContextRequestReceived func(req *ContextGenerationRequest)
OnContextRequestCompleted func(req *ContextGenerationRequest, result *ContextResult)
}
// ErrorSeverity defines the severity levels for context errors
type ErrorSeverity string
const (
ErrorSeverityLow ErrorSeverity = "low"
ErrorSeverityMedium ErrorSeverity = "medium"
ErrorSeverityHigh ErrorSeverity = "high"
ErrorSeverityCritical ErrorSeverity = "critical"
)
// ContextFailoverState holds state for context leadership failover
type ContextFailoverState struct {
LeaderID string `json:"leader_id"`
Term int64 `json:"term"`
TransferTime time.Time `json:"transfer_time"`
StateVersion int64 `json:"state_version"`
QueuedRequests []*ContextGenerationRequest `json:"queued_requests"`
ActiveJobs map[string]*ContextGenerationJob `json:"active_jobs"`
ManagerConfig *ManagerConfig `json:"manager_config"`
ClusterState *ContextClusterState `json:"cluster_state"`
HealthSnapshot *ContextClusterHealth `json:"health_snapshot"`
Checksum string `json:"checksum"`
}
// ManagerConfig holds configuration for the context manager
type ManagerConfig struct {
MaxConcurrentJobs int `json:"max_concurrent_jobs"`
DefaultTimeout time.Duration `json:"default_timeout"`
EnableCaching bool `json:"enable_caching"`
CacheTTL time.Duration `json:"cache_ttl"`
RetryAttempts int `json:"retry_attempts"`
WorkerPoolSize int `json:"worker_pool_size"`
}
// DefaultManagerConfig returns default manager configuration
func DefaultManagerConfig() *ManagerConfig {
return &ManagerConfig{
MaxConcurrentJobs: 10,
DefaultTimeout: 60 * time.Second,
EnableCaching: true,
CacheTTL: 5 * time.Minute,
RetryAttempts: 3,
WorkerPoolSize: 5,
}
}
// ContextClusterState holds the state of the context generation cluster
type ContextClusterState struct {
Nodes map[string]*ContextNodeInfo `json:"nodes"`
TotalCapacity int `json:"total_capacity"`
AvailableCapacity int `json:"available_capacity"`
LoadBalance float64 `json:"load_balance"`
LastUpdate time.Time `json:"last_update"`
}
// ContextNodeInfo holds information about a node in the context cluster
type ContextNodeInfo struct {
NodeID string `json:"node_id"`
Capacity int `json:"capacity"`
ActiveJobs int `json:"active_jobs"`
LastSeen time.Time `json:"last_seen"`
HealthStatus string `json:"health_status"`
AverageLatency time.Duration `json:"average_latency"`
SuccessRate float64 `json:"success_rate"`
}
// ContextClusterHealth represents the overall health of the context generation cluster
type ContextClusterHealth struct {
TotalNodes int `json:"total_nodes"`
HealthyNodes int `json:"healthy_nodes"`
UnhealthyNodes int `json:"unhealthy_nodes"`
GenerationActive bool `json:"generation_active"`
AverageLatency time.Duration `json:"average_latency"`
SuccessRate float64 `json:"success_rate"`
OverallHealthScore float64 `json:"overall_health_score"` // 0.0-1.0
LastElection time.Time `json:"last_election"`
NextHealthCheck time.Time `json:"next_health_check"`
CapacityUtilization float64 `json:"capacity_utilization"`
ErrorRate float64 `json:"error_rate"`
Issues []string `json:"issues,omitempty"`
}
// ContextStateValidation holds the results of context state validation
type ContextStateValidation struct {
Valid bool `json:"valid"`
ValidatedAt time.Time `json:"validated_at"`
ValidatedBy string `json:"validated_by"`
ValidationDuration time.Duration `json:"validation_duration"`
ChecksumValid bool `json:"checksum_valid"`
TimestampValid bool `json:"timestamp_valid"`
VersionConsistent bool `json:"version_consistent"`
QueueStateValid bool `json:"queue_state_valid"`
ClusterStateValid bool `json:"cluster_state_valid"`
ConfigValid bool `json:"config_valid"`
RequiresRecovery bool `json:"requires_recovery"`
Issues []string `json:"issues,omitempty"`
RecoverySteps []string `json:"recovery_steps,omitempty"`
}
// LeaderInfo contains information about the current context leader
type LeaderInfo struct {
NodeID string `json:"node_id"`
Term int64 `json:"term"`
ElectedAt time.Time `json:"elected_at"`
}