Enhance deployment system with retry functionality and improved UX

Major Improvements:
- Added retry deployment buttons in machine list for failed deployments
- Added retry button in SSH console modal footer for enhanced UX
- Enhanced deployment process with comprehensive cleanup of existing services
- Improved binary installation with password-based sudo authentication
- Updated configuration generation to include all required sections (agent, ai, network, security)
- Fixed deployment verification and error handling

Security Enhancements:
- Enhanced verifiedStopExistingServices with thorough cleanup process
- Improved binary copying with proper sudo authentication
- Added comprehensive configuration validation

UX Improvements:
- Users can retry deployments without re-running machine discovery
- Retry buttons available from both machine list and console modal
- Real-time deployment progress with detailed console output
- Clear error states with actionable retry options

Technical Changes:
- Modified ServiceDeployment.tsx with retry button components
- Enhanced api/setup_manager.go with improved deployment functions
- Updated main.go with command line argument support (--config, --setup)
- Added comprehensive zero-trust security validation system

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
anthonyrawlins
2025-08-31 10:23:27 +10:00
parent df4d98bf30
commit be761cfe20
234 changed files with 7508 additions and 38528 deletions

View File

@@ -1163,31 +1163,48 @@ func (s *SetupManager) verifiedPreDeploymentCheck(client *ssh.Client, config int
// verifiedStopExistingServices stops any existing BZZZ services
func (s *SetupManager) verifiedStopExistingServices(client *ssh.Client, config interface{}, password string, result *DeploymentResult) error {
stepName := "Stop Existing Services"
stepName := "Stop & Remove Existing Services"
s.addStep(result, stepName, "running", "", "", "", false)
// Stop systemd service if exists
cmd1 := "systemctl stop bzzz 2>/dev/null || echo 'No systemd service to stop'"
output1, _ := s.executeSudoCommand(client, password, cmd1)
// Disable and remove service file
cmd2 := "systemctl disable bzzz 2>/dev/null; rm -f /etc/systemd/system/bzzz.service ~/.config/systemd/user/bzzz.service 2>/dev/null || echo 'No service file to remove'"
output2, _ := s.executeSudoCommand(client, password, cmd2)
// Kill any remaining processes
cmd2 := "pkill -f bzzz || echo 'No processes to kill'"
output2, _ := s.executeSSHCommand(client, cmd2)
cmd3 := "pkill -f bzzz || echo 'No processes to kill'"
output3, _ := s.executeSSHCommand(client, cmd3)
// Remove old binaries from standard locations
cmd4 := "rm -f /usr/local/bin/bzzz ~/bin/bzzz ~/bzzz 2>/dev/null || echo 'No old binaries to remove'"
output4, _ := s.executeSudoCommand(client, password, cmd4)
// Reload systemd after changes
cmd5 := "systemctl daemon-reload 2>/dev/null || echo 'Systemd reload completed'"
output5, _ := s.executeSudoCommand(client, password, cmd5)
// Verify no processes remain
output3, err := s.executeSSHCommand(client, "ps aux | grep bzzz | grep -v grep || echo 'All BZZZ processes stopped'")
output6, err := s.executeSSHCommand(client, "ps aux | grep bzzz | grep -v grep || echo 'All BZZZ processes stopped'")
if err != nil {
s.updateLastStep(result, "failed", cmd2, output1+"\n"+output2+"\n"+output3, fmt.Sprintf("Failed verification: %v", err), false)
combinedOutput := fmt.Sprintf("Stop service:\n%s\n\nRemove service:\n%s\n\nKill processes:\n%s\n\nRemove binaries:\n%s\n\nReload systemd:\n%s\n\nVerification:\n%s",
output1, output2, output3, output4, output5, output6)
s.updateLastStep(result, "failed", "cleanup verification", combinedOutput, fmt.Sprintf("Failed verification: %v", err), false)
return fmt.Errorf("failed to verify process cleanup: %v", err)
}
if !strings.Contains(output3, "All BZZZ processes stopped") {
s.updateLastStep(result, "failed", cmd2, output1+"\n"+output2+"\n"+output3, "BZZZ processes still running after cleanup", false)
if !strings.Contains(output6, "All BZZZ processes stopped") {
combinedOutput := fmt.Sprintf("Stop service:\n%s\n\nRemove service:\n%s\n\nKill processes:\n%s\n\nRemove binaries:\n%s\n\nReload systemd:\n%s\n\nVerification:\n%s",
output1, output2, output3, output4, output5, output6)
s.updateLastStep(result, "failed", "process verification", combinedOutput, "BZZZ processes still running after cleanup", false)
return fmt.Errorf("failed to stop all BZZZ processes")
}
combinedOutput := fmt.Sprintf("Systemd stop:\n%s\n\nProcess kill:\n%s\n\nVerification:\n%s", output1, output2, output3)
s.updateLastStep(result, "success", cmd1+" && "+cmd2, combinedOutput, "", true)
combinedOutput := fmt.Sprintf("Stop service:\n%s\n\nRemove service:\n%s\n\nKill processes:\n%s\n\nRemove binaries:\n%s\n\nReload systemd:\n%s\n\nVerification:\n%s",
output1, output2, output3, output4, output5, output6)
s.updateLastStep(result, "success", "stop + cleanup + verify", combinedOutput, "", true)
return nil
}
@@ -1257,7 +1274,7 @@ func (s *SetupManager) verifiedCopyBinary(client *ssh.Client, config interface{}
s.addStep(result, stepName, "running", "", "", "", false)
// Copy binary using existing function but with verification
if err := s.copyBinaryToMachine(client); err != nil {
if err := s.copyBinaryToMachineWithPassword(client, password); err != nil {
s.updateLastStep(result, "failed", "scp binary", "", err.Error(), false)
return fmt.Errorf("binary copy failed: %v", err)
}
@@ -1447,8 +1464,8 @@ func (s *SetupManager) verifiedPostDeploymentTest(client *ssh.Client, config int
return nil
}
// copyBinaryToMachine copies the BZZZ binary to remote machine using SCP protocol
func (s *SetupManager) copyBinaryToMachine(client *ssh.Client) error {
// copyBinaryToMachineWithPassword copies the BZZZ binary to remote machine using SCP protocol with sudo password
func (s *SetupManager) copyBinaryToMachineWithPassword(client *ssh.Client, password string) error {
// Read current binary
binaryPath, err := os.Executable()
if err != nil {
@@ -1528,8 +1545,19 @@ func (s *SetupManager) copyBinaryToMachine(client *ssh.Client) error {
}
defer session.Close()
// First try passwordless sudo
if err := session.Run("sudo -n mv ~/bzzz /usr/local/bin/bzzz && sudo -n chmod +x /usr/local/bin/bzzz"); err != nil {
// Try to move to /usr/local/bin with sudo (with or without password), fall back to user bin if needed
var sudoCmd string
if password == "" {
// Try passwordless sudo first
sudoCmd = "sudo -n mv ~/bzzz /usr/local/bin/bzzz && sudo -n chmod +x /usr/local/bin/bzzz"
} else {
// Use password sudo
escapedPassword := strings.ReplaceAll(password, "'", "'\"'\"'")
sudoCmd = fmt.Sprintf("echo '%s' | sudo -S mv ~/bzzz /usr/local/bin/bzzz && echo '%s' | sudo -S chmod +x /usr/local/bin/bzzz",
escapedPassword, escapedPassword)
}
if err := session.Run(sudoCmd); err != nil {
// If sudo fails, create user bin directory and install there
session, err = client.NewSession()
if err != nil {
@@ -1555,6 +1583,11 @@ func (s *SetupManager) copyBinaryToMachine(client *ssh.Client) error {
return nil
}
// copyBinaryToMachine copies the BZZZ binary to remote machine using SCP protocol (passwordless sudo)
func (s *SetupManager) copyBinaryToMachine(client *ssh.Client) error {
return s.copyBinaryToMachineWithPassword(client, "")
}
// createSystemdService creates systemd service file
func (s *SetupManager) createSystemdService(client *ssh.Client, config interface{}) error {
// Determine the correct binary path
@@ -1772,6 +1805,16 @@ func (s *SetupManager) generateAndDeployConfig(client *ssh.Client, nodeIP string
agent:
id: "%s-agent"
name: "%s Agent"
specialization: "general_developer"
capabilities: ["general", "reasoning", "task-coordination"]
models: ["phi3", "llama3.1"]
max_tasks: 3
# AI/LLM configuration
ai:
ollama:
base_url: "http://192.168.1.27:11434"
timeout: 30s
# Network configuration
network:
@@ -1785,6 +1828,8 @@ network:
# Security configuration
security:
cluster_secret: "%v"
audit_logging: true
key_rotation_days: 90
# Storage configuration
storage:
@@ -1794,6 +1839,21 @@ storage:
logging:
level: "info"
file: "~/.bzzz/logs/bzzz.log"
# GitHub integration (optional)
github:
token_file: "/home/tony/chorus/business/secrets/gh-token"
timeout: 30s
# WHOOSH API configuration
whoosh_api:
base_url: "https://hive.home.deepblack.cloud"
timeout: 30s
retry_count: 3
# P2P configuration
p2p:
escalation_webhook: "https://n8n.home.deepblack.cloud/webhook/escalation"
`, hostname, hostname, hostname, ports["api"], ports["mcp"], ports["webui"], ports["p2p"], securityConfig["cluster_secret"])
// Create configuration directory