Enhance deployment system with retry functionality and improved UX

Major Improvements:
- Added retry deployment buttons in machine list for failed deployments
- Added retry button in SSH console modal footer for enhanced UX
- Enhanced deployment process with comprehensive cleanup of existing services
- Improved binary installation with password-based sudo authentication
- Updated configuration generation to include all required sections (agent, ai, network, security)
- Fixed deployment verification and error handling

Security Enhancements:
- Enhanced verifiedStopExistingServices with thorough cleanup process
- Improved binary copying with proper sudo authentication
- Added comprehensive configuration validation

UX Improvements:
- Users can retry deployments without re-running machine discovery
- Retry buttons available from both machine list and console modal
- Real-time deployment progress with detailed console output
- Clear error states with actionable retry options

Technical Changes:
- Modified ServiceDeployment.tsx with retry button components
- Enhanced api/setup_manager.go with improved deployment functions
- Updated main.go with command line argument support (--config, --setup)
- Added comprehensive zero-trust security validation system

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-08-31 10:23:27 +10:00
parent df4d98bf30
commit be761cfe20
234 changed files with 7508 additions and 38528 deletions

View File

@@ -0,0 +1,604 @@
package runtime
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"os"
"path/filepath"
"reflect"
"time"
"chorus.services/bzzz/api"
"chorus.services/bzzz/coordinator"
"chorus.services/bzzz/discovery"
"chorus.services/bzzz/logging"
"chorus.services/bzzz/p2p"
"chorus.services/bzzz/pkg/config"
"chorus.services/bzzz/pkg/crypto"
"chorus.services/bzzz/pkg/dht"
"chorus.services/bzzz/pkg/election"
"chorus.services/bzzz/pkg/health"
"chorus.services/bzzz/pkg/shutdown"
"chorus.services/bzzz/pkg/ucxi"
"chorus.services/bzzz/pkg/ucxl"
"chorus.services/bzzz/pubsub"
"chorus.services/bzzz/reasoning"
"chorus.services/hmmm/pkg/hmmm"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/multiformats/go-multiaddr"
)
// initializeConfig loads and validates configuration
func (r *StandardRuntime) initializeConfig(configPath string, services *RuntimeServices) error {
// Determine config file path
if configPath == "" {
configPath = os.Getenv("BZZZ_CONFIG_PATH")
if configPath == "" {
configPath = ".bzzz/config.yaml"
}
}
// Check if setup is required
if config.IsSetupRequired(configPath) {
if r.config.EnableSetupMode {
r.logger.Info("🔧 Setup required - setup mode enabled")
return fmt.Errorf("setup required - please run setup first")
} else {
return fmt.Errorf("setup required but setup mode disabled")
}
}
// Load configuration
cfg, err := config.LoadConfig(configPath)
if err != nil {
return fmt.Errorf("failed to load configuration: %w", err)
}
// Validate configuration
if !config.IsValidConfiguration(cfg) {
return fmt.Errorf("configuration is invalid")
}
services.Config = cfg
return nil
}
// initializeP2P sets up P2P node and discovery
func (r *StandardRuntime) initializeP2P(ctx context.Context, services *RuntimeServices) error {
// Initialize P2P node
node, err := p2p.NewNode(ctx)
if err != nil {
return fmt.Errorf("failed to create P2P node: %w", err)
}
services.Node = node
// Apply node-specific configuration if agent ID is not set
if services.Config.Agent.ID == "" {
nodeID := node.ID().ShortString()
nodeSpecificCfg := config.GetNodeSpecificDefaults(nodeID)
// Merge node-specific defaults with loaded config
services.Config.Agent.ID = nodeSpecificCfg.Agent.ID
if len(services.Config.Agent.Capabilities) == 0 {
services.Config.Agent.Capabilities = nodeSpecificCfg.Agent.Capabilities
}
if len(services.Config.Agent.Models) == 0 {
services.Config.Agent.Models = nodeSpecificCfg.Agent.Models
}
if services.Config.Agent.Specialization == "" {
services.Config.Agent.Specialization = nodeSpecificCfg.Agent.Specialization
}
}
// Apply role-based configuration if no role is set
if services.Config.Agent.Role == "" {
defaultRole := getDefaultRoleForSpecialization(services.Config.Agent.Specialization)
if defaultRole != "" {
r.logger.Info("🎭 Applying default role: %s", defaultRole)
if err := services.Config.ApplyRoleDefinition(defaultRole); err != nil {
r.logger.Warn("⚠️ Failed to apply role definition: %v", err)
} else {
r.logger.Info("✅ Role applied: %s", services.Config.Agent.Role)
}
}
}
r.logger.Info("🐝 P2P node started successfully")
r.logger.Info("📍 Node ID: %s", node.ID().ShortString())
r.logger.Info("🤖 Agent ID: %s", services.Config.Agent.ID)
r.logger.Info("🎯 Specialization: %s", services.Config.Agent.Specialization)
// Display authority level if role is configured
if services.Config.Agent.Role != "" {
authority, err := services.Config.GetRoleAuthority(services.Config.Agent.Role)
if err == nil {
r.logger.Info("🎭 Role: %s (Authority: %s)", services.Config.Agent.Role, authority)
if authority == config.AuthorityMaster {
r.logger.Info("👑 This node can become admin/SLURP")
}
}
}
// Log listening addresses
r.logger.Info("🔗 Listening addresses:")
for _, addr := range node.Addresses() {
r.logger.Info(" %s/p2p/%s", addr, node.ID())
}
// Initialize mDNS discovery
mdnsDiscovery, err := discovery.NewMDNSDiscovery(ctx, node.Host(), "bzzz-peer-discovery")
if err != nil {
return fmt.Errorf("failed to create mDNS discovery: %w", err)
}
services.MDNSDiscovery = mdnsDiscovery
return nil
}
// initializeCoreServices sets up PubSub, DHT, HMMM, and other core services
func (r *StandardRuntime) initializeCoreServices(ctx context.Context, services *RuntimeServices) error {
// Initialize Hypercore-style logger
hlog := logging.NewHypercoreLog(services.Node.ID())
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
r.logger.Info("📝 Hypercore logger initialized")
// Initialize PubSub with hypercore logging
ps, err := pubsub.NewPubSubWithLogger(ctx, services.Node.Host(), "bzzz/coordination/v1", "hmmm/meta-discussion/v1", hlog)
if err != nil {
return fmt.Errorf("failed to create PubSub: %w", err)
}
services.PubSub = ps
// Initialize HMMM Router
hmmmAdapter := pubsub.NewGossipPublisher(ps)
hmmmRouter := hmmm.NewRouter(hmmmAdapter, hmmm.DefaultConfig())
services.HmmmRouter = hmmmRouter
r.logger.Info("🐜 HMMM Router initialized and attached to Bzzz pubsub")
// Join role-based topics if role is configured
if services.Config.Agent.Role != "" {
if err := ps.JoinRoleBasedTopics(services.Config.Agent.Role, services.Config.Agent.Expertise, services.Config.Agent.ReportsTo); err != nil {
r.logger.Warn("⚠️ Failed to join role-based topics: %v", err)
} else {
r.logger.Info("🎯 Joined role-based collaboration topics")
}
}
// Optional: HMMM per-issue room smoke test
if os.Getenv("BZZZ_HMMM_SMOKE") == "1" {
r.performHMMMSmokeTest(ps, services.Node)
}
// Initialize Admin Election System
electionManager := election.NewElectionManager(ctx, services.Config, services.Node.Host(), ps, services.Node.ID().ShortString())
// Set election callbacks
electionManager.SetCallbacks(
func(oldAdmin, newAdmin string) {
r.logger.Info("👑 Admin changed: %s -> %s", oldAdmin, newAdmin)
// If this node becomes admin, enable SLURP functionality
if newAdmin == services.Node.ID().ShortString() {
r.logger.Info("🎯 This node is now admin - enabling SLURP functionality")
services.Config.Slurp.Enabled = true
// Apply admin role configuration
if err := services.Config.ApplyRoleDefinition("admin"); err != nil {
r.logger.Warn("⚠️ Failed to apply admin role: %v", err)
}
}
},
func(winner string) {
r.logger.Info("🏆 Election completed, winner: %s", winner)
},
)
services.ElectionManager = electionManager
// Initialize DHT and encrypted storage if enabled
if err := r.initializeDHT(ctx, services); err != nil {
r.logger.Warn("⚠️ DHT initialization failed: %v", err)
// DHT failure is not fatal, continue without it
}
// Initialize Task Coordinator
taskCoordinator := coordinator.NewTaskCoordinator(
ctx,
ps,
hlog,
services.Config,
services.Node.ID().ShortString(),
hmmmRouter,
)
services.TaskCoordinator = taskCoordinator
// Initialize HTTP API server
httpPort := 8080
if r.config.CustomPorts.HTTPPort != 0 {
httpPort = r.config.CustomPorts.HTTPPort
}
httpServer := api.NewHTTPServer(httpPort, hlog, ps)
services.HTTPServer = httpServer
// Initialize UCXI server if enabled
if err := r.initializeUCXI(services); err != nil {
r.logger.Warn("⚠️ UCXI initialization failed: %v", err)
// UCXI failure is not fatal, continue without it
}
return nil
}
// initializeDHT sets up DHT and encrypted storage
func (r *StandardRuntime) initializeDHT(ctx context.Context, services *RuntimeServices) error {
if !services.Config.V2.DHT.Enabled {
r.logger.Info("⚪ DHT disabled in configuration")
return nil
}
// Create DHT
dhtNode, err := dht.NewLibP2PDHT(ctx, services.Node.Host())
if err != nil {
return fmt.Errorf("failed to create DHT: %w", err)
}
services.DHT = dhtNode
r.logger.Info("🕸️ DHT initialized")
// Bootstrap DHT
if err := dhtNode.Bootstrap(); err != nil {
r.logger.Warn("⚠️ DHT bootstrap failed: %v", err)
}
// Connect to bootstrap peers if configured
for _, addrStr := range services.Config.V2.DHT.BootstrapPeers {
addr, err := multiaddr.NewMultiaddr(addrStr)
if err != nil {
r.logger.Warn("⚠️ Invalid bootstrap address %s: %v", addrStr, err)
continue
}
// Extract peer info from multiaddr
info, err := peer.AddrInfoFromP2pAddr(addr)
if err != nil {
r.logger.Warn("⚠️ Failed to parse peer info from %s: %v", addrStr, err)
continue
}
if err := services.Node.Host().Connect(ctx, *info); err != nil {
r.logger.Warn("⚠️ Failed to connect to bootstrap peer %s: %v", addrStr, err)
} else {
r.logger.Info("🔗 Connected to DHT bootstrap peer: %s", addrStr)
}
}
// Initialize encrypted storage
encryptedStorage := dht.NewEncryptedDHTStorage(
ctx,
services.Node.Host(),
dhtNode,
services.Config,
services.Node.ID().ShortString(),
)
services.EncryptedStorage = encryptedStorage
// Start cache cleanup
encryptedStorage.StartCacheCleanup(5 * time.Minute)
r.logger.Info("🔐 Encrypted DHT storage initialized")
// Initialize decision publisher
decisionPublisher := ucxl.NewDecisionPublisher(
ctx,
services.Config,
encryptedStorage,
services.Node.ID().ShortString(),
services.Config.Agent.ID,
)
services.DecisionPublisher = decisionPublisher
r.logger.Info("📤 Decision publisher initialized")
// Test the encryption system on startup
go func() {
time.Sleep(2 * time.Second) // Wait for initialization
r.testEncryptionSystems(decisionPublisher, encryptedStorage)
}()
return nil
}
// initializeUCXI sets up UCXI server if enabled
func (r *StandardRuntime) initializeUCXI(services *RuntimeServices) error {
if !services.Config.UCXL.Enabled || !services.Config.UCXL.Server.Enabled {
r.logger.Info("⚪ UCXI server disabled (UCXL protocol not enabled)")
return nil
}
// Create storage directory
storageDir := services.Config.UCXL.Storage.Directory
if storageDir == "" {
storageDir = filepath.Join(os.TempDir(), "bzzz-ucxi-storage")
}
storage, err := ucxi.NewBasicContentStorage(storageDir)
if err != nil {
return fmt.Errorf("failed to create UCXI storage: %w", err)
}
// Create resolver
resolver := ucxi.NewBasicAddressResolver(services.Node.ID().ShortString())
resolver.SetDefaultTTL(services.Config.UCXL.Resolution.CacheTTL)
// TODO: Add P2P integration hooks here
// resolver.SetAnnounceHook(...)
// resolver.SetDiscoverHook(...)
// Create UCXI server
ucxiPort := services.Config.UCXL.Server.Port
if r.config.CustomPorts.UCXIPort != 0 {
ucxiPort = r.config.CustomPorts.UCXIPort
}
ucxiConfig := ucxi.ServerConfig{
Port: ucxiPort,
BasePath: services.Config.UCXL.Server.BasePath,
Resolver: resolver,
Storage: storage,
Logger: ucxi.SimpleLogger{},
}
ucxiServer := ucxi.NewServer(ucxiConfig)
services.UCXIServer = ucxiServer
return nil
}
// applyBinarySpecificConfig applies configuration specific to the binary type
func (r *StandardRuntime) applyBinarySpecificConfig(binaryType BinaryType, services *RuntimeServices) error {
switch binaryType {
case BinaryTypeAgent:
return r.applyAgentSpecificConfig(services)
case BinaryTypeHAP:
return r.applyHAPSpecificConfig(services)
default:
return fmt.Errorf("unknown binary type: %v", binaryType)
}
}
// applyAgentSpecificConfig applies agent-specific configuration
func (r *StandardRuntime) applyAgentSpecificConfig(services *RuntimeServices) error {
// Configure agent-specific capabilities and model detection
r.setupAgentCapabilities(services)
// Agent-specific port defaults (if not overridden)
if r.config.CustomPorts.HTTPPort == 0 {
r.config.CustomPorts.HTTPPort = 8080
}
if r.config.CustomPorts.HealthPort == 0 {
r.config.CustomPorts.HealthPort = 8081
}
return nil
}
// applyHAPSpecificConfig applies HAP-specific configuration
func (r *StandardRuntime) applyHAPSpecificConfig(services *RuntimeServices) error {
// HAP-specific port defaults (to avoid conflicts with agent)
if r.config.CustomPorts.HTTPPort == 0 {
r.config.CustomPorts.HTTPPort = 8090
}
if r.config.CustomPorts.HealthPort == 0 {
r.config.CustomPorts.HealthPort = 8091
}
// HAP doesn't need some agent-specific services
// This could be expanded to disable certain features
r.logger.Info("🎭 HAP-specific configuration applied")
return nil
}
// initializeMonitoring sets up health monitoring and shutdown management
func (r *StandardRuntime) initializeMonitoring(services *RuntimeServices) error {
// Initialize shutdown manager
shutdownManager := shutdown.NewManager(30*time.Second, &SimpleLogger{logger: r.logger})
services.ShutdownManager = shutdownManager
// Initialize health manager
healthManager := health.NewManager(services.Node.ID().ShortString(), "v0.2.0", &SimpleLogger{logger: r.logger})
healthManager.SetShutdownManager(shutdownManager)
services.HealthManager = healthManager
// Register health checks
r.setupHealthChecks(healthManager, services.PubSub, services.Node, services.DHT)
// Register components for graceful shutdown
r.setupGracefulShutdown(shutdownManager, healthManager, services)
return nil
}
// SimpleLogger implements the logger interface expected by shutdown and health systems
type SimpleLogger struct {
logger logging.Logger
}
func (l *SimpleLogger) Info(msg string, args ...interface{}) {
l.logger.Info(msg, args...)
}
func (l *SimpleLogger) Warn(msg string, args ...interface{}) {
l.logger.Warn(msg, args...)
}
func (l *SimpleLogger) Error(msg string, args ...interface{}) {
l.logger.Error(msg, args...)
}
// Utility functions moved from main.go
func (r *StandardRuntime) performHMMMSmokeTest(ps *pubsub.PubSub, node *p2p.Node) {
issueID := 42
topic := fmt.Sprintf("bzzz/meta/issue/%d", issueID)
if err := ps.JoinDynamicTopic(topic); err != nil {
r.logger.Warn("⚠️ HMMM smoke: failed to join %s: %v", topic, err)
} else {
seed := map[string]interface{}{
"version": 1,
"type": "meta_msg",
"issue_id": issueID,
"thread_id": fmt.Sprintf("issue-%d", issueID),
"msg_id": fmt.Sprintf("seed-%d", time.Now().UnixNano()),
"node_id": node.ID().ShortString(),
"hop_count": 0,
"timestamp": time.Now().UTC(),
"message": "Seed: HMMM per-issue room initialized.",
}
b, _ := json.Marshal(seed)
if err := ps.PublishRaw(topic, b); err != nil {
r.logger.Warn("⚠️ HMMM smoke: publish failed: %v", err)
} else {
r.logger.Info("🧪 HMMM smoke: published seed to %s", topic)
}
}
}
func (r *StandardRuntime) testEncryptionSystems(publisher *ucxl.DecisionPublisher, storage *dht.EncryptedDHTStorage) {
if err := crypto.TestAgeEncryption(); err != nil {
r.logger.Error("❌ Age encryption test failed: %v", err)
} else {
r.logger.Info("✅ Age encryption test passed")
}
if err := crypto.TestShamirSecretSharing(); err != nil {
r.logger.Error("❌ Shamir secret sharing test failed: %v", err)
} else {
r.logger.Info("✅ Shamir secret sharing test passed")
}
// Test end-to-end encrypted decision flow
time.Sleep(3 * time.Second) // Wait a bit more
r.testEndToEndDecisionFlow(publisher, storage)
}
func (r *StandardRuntime) testEndToEndDecisionFlow(publisher *ucxl.DecisionPublisher, storage *dht.EncryptedDHTStorage) {
if publisher == nil || storage == nil {
r.logger.Info("⚪ Skipping end-to-end test (components not initialized)")
return
}
r.logger.Info("🧪 Testing end-to-end encrypted decision flow...")
// Test 1: Publish an architectural decision
err := publisher.PublishArchitecturalDecision(
"implement_unified_bzzz_slurp",
"Integrate SLURP as specialized BZZZ agent with admin role for unified P2P architecture",
"Eliminates separate system complexity and leverages existing P2P infrastructure",
[]string{"Keep separate systems", "Use different consensus algorithm"},
[]string{"Single point of coordination", "Improved failover", "Simplified deployment"},
[]string{"Test consensus elections", "Implement key reconstruction", "Deploy to cluster"},
)
if err != nil {
r.logger.Error("❌ Failed to publish architectural decision: %v", err)
return
}
r.logger.Info("✅ Published architectural decision")
r.logger.Info("🎉 End-to-end encrypted decision flow test completed successfully!")
r.logger.Info("🔐 All decisions encrypted with role-based Age encryption")
r.logger.Info("🕸️ Content stored in distributed DHT with local caching")
r.logger.Info("🔍 Content discoverable and retrievable by authorized roles")
}
func (r *StandardRuntime) setupAgentCapabilities(services *RuntimeServices) {
// Detect available Ollama models and update config
availableModels, err := r.detectAvailableOllamaModels(services.Config.AI.Ollama.Endpoint)
if err != nil {
r.logger.Warn("⚠️ Failed to detect Ollama models: %v", err)
r.logger.Info("🔄 Using configured models: %v", services.Config.Agent.Models)
} else {
// Filter configured models to only include available ones
validModels := make([]string, 0)
for _, configModel := range services.Config.Agent.Models {
for _, availableModel := range availableModels {
if configModel == availableModel {
validModels = append(validModels, configModel)
break
}
}
}
if len(validModels) == 0 {
r.logger.Warn("⚠️ No configured models available in Ollama, using first available: %v", availableModels)
if len(availableModels) > 0 {
validModels = []string{availableModels[0]}
}
} else {
r.logger.Info("✅ Available models: %v", validModels)
}
// Update config with available models
services.Config.Agent.Models = validModels
// Configure reasoning module with available models and webhook
reasoning.SetModelConfig(validModels, services.Config.Agent.ModelSelectionWebhook, services.Config.Agent.DefaultReasoningModel)
reasoning.SetOllamaEndpoint(services.Config.AI.Ollama.Endpoint)
}
}
// detectAvailableOllamaModels queries Ollama API for available models
func (r *StandardRuntime) detectAvailableOllamaModels(endpoint string) ([]string, error) {
if endpoint == "" {
endpoint = "http://localhost:11434" // fallback
}
apiURL := endpoint + "/api/tags"
resp, err := http.Get(apiURL)
if err != nil {
return nil, fmt.Errorf("failed to connect to Ollama API: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("Ollama API returned status %d", resp.StatusCode)
}
var tagsResponse struct {
Models []struct {
Name string `json:"name"`
} `json:"models"`
}
if err := json.NewDecoder(resp.Body).Decode(&tagsResponse); err != nil {
return nil, fmt.Errorf("failed to decode Ollama response: %w", err)
}
models := make([]string, 0, len(tagsResponse.Models))
for _, model := range tagsResponse.Models {
models = append(models, model.Name)
}
return models, nil
}
// getDefaultRoleForSpecialization maps specializations to default roles
func getDefaultRoleForSpecialization(specialization string) string {
roleMap := map[string]string{
"code_generation": "backend_developer",
"advanced_reasoning": "senior_software_architect",
"code_analysis": "security_expert",
"general_developer": "full_stack_engineer",
"debugging": "qa_engineer",
"frontend": "frontend_developer",
"backend": "backend_developer",
"devops": "devops_engineer",
"security": "security_expert",
"design": "ui_ux_designer",
"architecture": "senior_software_architect",
}
if role, exists := roleMap[specialization]; exists {
return role
}
// Default fallback
return "full_stack_engineer"
}