 ea04378962
			
		
	
	ea04378962
	
	
	
		
			
			## Problem Analysis - WHOOSH service was failing to start due to BACKBEAT NATS connectivity issues - Containers were unable to resolve "backbeat-nats" hostname from DNS - Service was stuck in deployment loops with all replicas failing - Root cause: Missing WHOOSH_BACKBEAT_NATS_URL environment variable configuration ## Solution Implementation ### 1. BACKBEAT Configuration Fix - **Added explicit WHOOSH BACKBEAT environment variables** to docker-compose.yml: - `WHOOSH_BACKBEAT_ENABLED: "false"` (temporarily disabled for stability) - `WHOOSH_BACKBEAT_CLUSTER_ID: "chorus-production"` - `WHOOSH_BACKBEAT_AGENT_ID: "whoosh"` - `WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"` ### 2. Service Deployment Improvements - **Removed rosewood node constraints** across all services (gaming PC intermittency) - **Simplified network configuration** by removing unused `whoosh-backend` network - **Improved health check configuration** for postgres service - **Streamlined service placement** for better distribution ### 3. Code Quality Improvements - **Fixed code formatting** inconsistencies in HTTP server - **Updated service comments** from "Bzzz" to "CHORUS" for clarity - **Standardized import grouping** and spacing ## Results Achieved ### ✅ WHOOSH Service Operational - **Service successfully running** on walnut node (1/2 replicas healthy) - **Health checks passing** - API accessible on port 8800 - **Database connectivity restored** - migrations completed successfully - **Council formation working** - teams being created and tasks assigned ### ✅ Core Functionality Verified - **Agent discovery active** - CHORUS agents being detected and registered - **Task processing operational** - autonomous team formation working - **API endpoints responsive** - `/health` returning proper status - **Service integration** - discovery of multiple CHORUS agent endpoints ## Technical Details ### Service Configuration - **Environment**: Production Docker Swarm deployment - **Database**: PostgreSQL with automatic migrations - **Networking**: Internal chorus_net overlay network - **Load Balancing**: Traefik routing with SSL certificates - **Monitoring**: Prometheus metrics collection enabled ### Deployment Status ``` CHORUS_whoosh.2.nej8z6nbae1a@walnut Running 31 seconds ago - Health checks: ✅ Passing (200 OK responses) - Database: ✅ Connected and migrated - Agent Discovery: ✅ Active (multiple agents detected) - Council Formation: ✅ Functional (teams being created) ``` ### Key Log Evidence ``` {"service":"whoosh","status":"ok","version":"0.1.0-mvp"} 🚀 Task successfully assigned to team 🤖 Discovered CHORUS agent with metadata ✅ Database migrations completed 🌐 Starting HTTP server on :8080 ``` ## Next Steps - **BACKBEAT Integration**: Re-enable once NATS connectivity fully stabilized - **Multi-Node Deployment**: Investigate ironwood node DNS resolution issues - **Performance Monitoring**: Verify scaling behavior under load - **Integration Testing**: Full project ingestion and council formation workflows 🎯 **Mission Accomplished**: WHOOSH is now operational and ready for autonomous development team orchestration testing. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
		
			
				
	
	
		
			382 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			382 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package config
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"io/ioutil"
 | |
| 	"os"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| )
 | |
| 
 | |
| // This is a container-adapted version of CHORUS's config system
 | |
| // All configuration comes from environment variables instead of YAML files
 | |
| 
 | |
| // Config represents the complete CHORUS configuration loaded from environment variables
 | |
| type Config struct {
 | |
| 	Agent     AgentConfig     `yaml:"agent"`
 | |
| 	Network   NetworkConfig   `yaml:"network"`
 | |
| 	License   LicenseConfig   `yaml:"license"`
 | |
| 	AI        AIConfig        `yaml:"ai"`
 | |
| 	Logging   LoggingConfig   `yaml:"logging"`
 | |
| 	V2        V2Config        `yaml:"v2"`
 | |
| 	UCXL      UCXLConfig      `yaml:"ucxl"`
 | |
| 	Slurp     SlurpConfig     `yaml:"slurp"`
 | |
| 	Security  SecurityConfig  `yaml:"security"`
 | |
| 	WHOOSHAPI WHOOSHAPIConfig `yaml:"whoosh_api"`
 | |
| }
 | |
| 
 | |
| // AgentConfig defines agent-specific settings
 | |
| type AgentConfig struct {
 | |
| 	ID                    string   `yaml:"id"`
 | |
| 	Specialization        string   `yaml:"specialization"`
 | |
| 	MaxTasks              int      `yaml:"max_tasks"`
 | |
| 	Capabilities          []string `yaml:"capabilities"`
 | |
| 	Models                []string `yaml:"models"`
 | |
| 	Role                  string   `yaml:"role"`
 | |
| 	Project               string   `yaml:"project"`
 | |
| 	Expertise             []string `yaml:"expertise"`
 | |
| 	ReportsTo             string   `yaml:"reports_to"`
 | |
| 	Deliverables          []string `yaml:"deliverables"`
 | |
| 	ModelSelectionWebhook string   `yaml:"model_selection_webhook"`
 | |
| 	DefaultReasoningModel string   `yaml:"default_reasoning_model"`
 | |
| }
 | |
| 
 | |
| // NetworkConfig defines network and API settings
 | |
| type NetworkConfig struct {
 | |
| 	P2PPort    int    `yaml:"p2p_port"`
 | |
| 	APIPort    int    `yaml:"api_port"`
 | |
| 	HealthPort int    `yaml:"health_port"`
 | |
| 	BindAddr   string `yaml:"bind_address"`
 | |
| }
 | |
| 
 | |
| // LicenseConfig defines licensing settings (adapted from CHORUS)
 | |
| type LicenseConfig struct {
 | |
| 	LicenseID        string    `yaml:"license_id"`
 | |
| 	ClusterID        string    `yaml:"cluster_id"`
 | |
| 	OrganizationName string    `yaml:"organization_name"`
 | |
| 	KachingURL       string    `yaml:"kaching_url"`
 | |
| 	IsActive         bool      `yaml:"is_active"`
 | |
| 	LastValidated    time.Time `yaml:"last_validated"`
 | |
| 	GracePeriodHours int       `yaml:"grace_period_hours"`
 | |
| 	LicenseType      string    `yaml:"license_type"`
 | |
| 	ExpiresAt        time.Time `yaml:"expires_at"`
 | |
| 	MaxNodes         int       `yaml:"max_nodes"`
 | |
| }
 | |
| 
 | |
| // AIConfig defines AI service settings
 | |
| type AIConfig struct {
 | |
| 	Provider  string          `yaml:"provider"`
 | |
| 	Ollama    OllamaConfig    `yaml:"ollama"`
 | |
| 	ResetData ResetDataConfig `yaml:"resetdata"`
 | |
| }
 | |
| 
 | |
| // OllamaConfig defines Ollama-specific settings
 | |
| type OllamaConfig struct {
 | |
| 	Endpoint string        `yaml:"endpoint"`
 | |
| 	Timeout  time.Duration `yaml:"timeout"`
 | |
| }
 | |
| 
 | |
| // ResetDataConfig defines ResetData LLM service settings
 | |
| type ResetDataConfig struct {
 | |
| 	BaseURL string        `yaml:"base_url"`
 | |
| 	APIKey  string        `yaml:"api_key"`
 | |
| 	Model   string        `yaml:"model"`
 | |
| 	Timeout time.Duration `yaml:"timeout"`
 | |
| }
 | |
| 
 | |
| // LoggingConfig defines logging settings
 | |
| type LoggingConfig struct {
 | |
| 	Level  string `yaml:"level"`
 | |
| 	Format string `yaml:"format"`
 | |
| }
 | |
| 
 | |
| // V2Config defines v2-specific settings (from CHORUS)
 | |
| type V2Config struct {
 | |
| 	DHT DHTConfig `yaml:"dht"`
 | |
| }
 | |
| 
 | |
| // DHTConfig defines DHT settings
 | |
| type DHTConfig struct {
 | |
| 	Enabled        bool     `yaml:"enabled"`
 | |
| 	BootstrapPeers []string `yaml:"bootstrap_peers"`
 | |
| 	MDNSEnabled    bool     `yaml:"mdns_enabled"`
 | |
| }
 | |
| 
 | |
| // UCXLConfig defines UCXL protocol settings
 | |
| type UCXLConfig struct {
 | |
| 	Enabled    bool             `yaml:"enabled"`
 | |
| 	Server     ServerConfig     `yaml:"server"`
 | |
| 	Storage    StorageConfig    `yaml:"storage"`
 | |
| 	Resolution ResolutionConfig `yaml:"resolution"`
 | |
| }
 | |
| 
 | |
| // ServerConfig defines server settings
 | |
| type ServerConfig struct {
 | |
| 	Enabled  bool   `yaml:"enabled"`
 | |
| 	Port     int    `yaml:"port"`
 | |
| 	BasePath string `yaml:"base_path"`
 | |
| }
 | |
| 
 | |
| // StorageConfig defines storage settings
 | |
| type StorageConfig struct {
 | |
| 	Directory string `yaml:"directory"`
 | |
| }
 | |
| 
 | |
| // ResolutionConfig defines resolution settings
 | |
| type ResolutionConfig struct {
 | |
| 	CacheTTL time.Duration `yaml:"cache_ttl"`
 | |
| }
 | |
| 
 | |
| // SlurpConfig defines SLURP settings
 | |
| type SlurpConfig struct {
 | |
| 	Enabled bool `yaml:"enabled"`
 | |
| }
 | |
| 
 | |
| // WHOOSHAPIConfig defines WHOOSH API integration settings
 | |
| type WHOOSHAPIConfig struct {
 | |
| 	URL     string `yaml:"url"`
 | |
| 	BaseURL string `yaml:"base_url"`
 | |
| 	Token   string `yaml:"token"`
 | |
| 	Enabled bool   `yaml:"enabled"`
 | |
| }
 | |
| 
 | |
| // LoadFromEnvironment loads configuration from environment variables
 | |
| func LoadFromEnvironment() (*Config, error) {
 | |
| 	cfg := &Config{
 | |
| 		Agent: AgentConfig{
 | |
| 			ID:                    getEnvOrDefault("CHORUS_AGENT_ID", ""),
 | |
| 			Specialization:        getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
 | |
| 			MaxTasks:              getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
 | |
| 			Capabilities:          getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
 | |
| 			Models:                getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
 | |
| 			Role:                  getEnvOrDefault("CHORUS_ROLE", ""),
 | |
| 			Project:               getEnvOrDefault("CHORUS_PROJECT", "chorus"),
 | |
| 			Expertise:             getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
 | |
| 			ReportsTo:             getEnvOrDefault("CHORUS_REPORTS_TO", ""),
 | |
| 			Deliverables:          getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
 | |
| 			ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
 | |
| 			DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
 | |
| 		},
 | |
| 		Network: NetworkConfig{
 | |
| 			P2PPort:    getEnvIntOrDefault("CHORUS_P2P_PORT", 9000),
 | |
| 			APIPort:    getEnvIntOrDefault("CHORUS_API_PORT", 8080),
 | |
| 			HealthPort: getEnvIntOrDefault("CHORUS_HEALTH_PORT", 8081),
 | |
| 			BindAddr:   getEnvOrDefault("CHORUS_BIND_ADDRESS", "0.0.0.0"),
 | |
| 		},
 | |
| 		License: LicenseConfig{
 | |
| 			LicenseID:        getEnvOrFileContent("CHORUS_LICENSE_ID", "CHORUS_LICENSE_ID_FILE"),
 | |
| 			ClusterID:        getEnvOrDefault("CHORUS_CLUSTER_ID", "default-cluster"),
 | |
| 			OrganizationName: getEnvOrDefault("CHORUS_ORGANIZATION_NAME", ""),
 | |
| 			KachingURL:       getEnvOrDefault("CHORUS_KACHING_URL", "https://kaching.chorus.services"),
 | |
| 			IsActive:         false, // Will be set during validation
 | |
| 			GracePeriodHours: getEnvIntOrDefault("CHORUS_GRACE_PERIOD_HOURS", 72),
 | |
| 		},
 | |
| 		AI: AIConfig{
 | |
| 			Provider: getEnvOrDefault("CHORUS_AI_PROVIDER", "resetdata"),
 | |
| 			Ollama: OllamaConfig{
 | |
| 				Endpoint: getEnvOrDefault("OLLAMA_ENDPOINT", "http://localhost:11434"),
 | |
| 				Timeout:  getEnvDurationOrDefault("OLLAMA_TIMEOUT", 30*time.Second),
 | |
| 			},
 | |
| 			ResetData: ResetDataConfig{
 | |
| 				BaseURL: getEnvOrDefault("RESETDATA_BASE_URL", "https://models.au-syd.resetdata.ai/v1"),
 | |
| 				APIKey:  getEnvOrFileContent("RESETDATA_API_KEY", "RESETDATA_API_KEY_FILE"),
 | |
| 				Model:   getEnvOrDefault("RESETDATA_MODEL", "meta/llama-3.1-8b-instruct"),
 | |
| 				Timeout: getEnvDurationOrDefault("RESETDATA_TIMEOUT", 30*time.Second),
 | |
| 			},
 | |
| 		},
 | |
| 		Logging: LoggingConfig{
 | |
| 			Level:  getEnvOrDefault("LOG_LEVEL", "info"),
 | |
| 			Format: getEnvOrDefault("LOG_FORMAT", "structured"),
 | |
| 		},
 | |
| 		V2: V2Config{
 | |
| 			DHT: DHTConfig{
 | |
| 				Enabled:        getEnvBoolOrDefault("CHORUS_DHT_ENABLED", true),
 | |
| 				BootstrapPeers: getEnvArrayOrDefault("CHORUS_BOOTSTRAP_PEERS", []string{}),
 | |
| 				MDNSEnabled:    getEnvBoolOrDefault("CHORUS_MDNS_ENABLED", true),
 | |
| 			},
 | |
| 		},
 | |
| 		UCXL: UCXLConfig{
 | |
| 			Enabled: getEnvBoolOrDefault("CHORUS_UCXL_ENABLED", true),
 | |
| 			Server: ServerConfig{
 | |
| 				Enabled:  getEnvBoolOrDefault("CHORUS_UCXL_SERVER_ENABLED", true),
 | |
| 				Port:     getEnvIntOrDefault("CHORUS_UCXL_SERVER_PORT", 8082),
 | |
| 				BasePath: getEnvOrDefault("CHORUS_UCXL_SERVER_BASE_PATH", ""),
 | |
| 			},
 | |
| 			Storage: StorageConfig{
 | |
| 				Directory: getEnvOrDefault("CHORUS_UCXL_STORAGE_DIRECTORY", "/tmp/chorus-ucxi-storage"),
 | |
| 			},
 | |
| 			Resolution: ResolutionConfig{
 | |
| 				CacheTTL: getEnvDurationOrDefault("CHORUS_UCXL_CACHE_TTL", 1*time.Hour),
 | |
| 			},
 | |
| 		},
 | |
| 		Slurp: SlurpConfig{
 | |
| 			Enabled: getEnvBoolOrDefault("CHORUS_SLURP_ENABLED", false),
 | |
| 		},
 | |
| 		Security: SecurityConfig{
 | |
| 			KeyRotationDays: getEnvIntOrDefault("CHORUS_KEY_ROTATION_DAYS", 30),
 | |
| 			AuditLogging:    getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
 | |
| 			AuditPath:       getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
 | |
| 			ElectionConfig: ElectionConfig{
 | |
| 				DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 15*time.Second),
 | |
| 				HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
 | |
| 				ElectionTimeout:  getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
 | |
| 				DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
 | |
| 				LeadershipScoring: &LeadershipScoring{
 | |
| 					UptimeWeight:     0.4,
 | |
| 					CapabilityWeight: 0.3,
 | |
| 					ExperienceWeight: 0.2,
 | |
| 					LoadWeight:       0.1,
 | |
| 				},
 | |
| 			},
 | |
| 		},
 | |
| 		WHOOSHAPI: WHOOSHAPIConfig{
 | |
| 			URL:     getEnvOrDefault("WHOOSH_API_URL", "http://localhost:3000"),
 | |
| 			BaseURL: getEnvOrDefault("WHOOSH_API_BASE_URL", "http://localhost:3000"),
 | |
| 			Token:   os.Getenv("WHOOSH_API_TOKEN"),
 | |
| 			Enabled: getEnvBoolOrDefault("WHOOSH_API_ENABLED", false),
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	// Validate required configuration
 | |
| 	if err := cfg.Validate(); err != nil {
 | |
| 		return nil, fmt.Errorf("configuration validation failed: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	return cfg, nil
 | |
| }
 | |
| 
 | |
| // Validate ensures all required configuration is present
 | |
| func (c *Config) Validate() error {
 | |
| 	if c.License.LicenseID == "" {
 | |
| 		return fmt.Errorf("CHORUS_LICENSE_ID is required")
 | |
| 	}
 | |
| 
 | |
| 	if c.Agent.ID == "" {
 | |
| 		// Auto-generate agent ID if not provided
 | |
| 		hostname, _ := os.Hostname()
 | |
| 		containerID := os.Getenv("HOSTNAME") // Docker sets this to container ID
 | |
| 		if containerID != "" && containerID != hostname {
 | |
| 			c.Agent.ID = fmt.Sprintf("chorus-%s", containerID[:12])
 | |
| 		} else {
 | |
| 			c.Agent.ID = fmt.Sprintf("chorus-%s", hostname)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // ApplyRoleDefinition applies role-based configuration (from CHORUS)
 | |
| func (c *Config) ApplyRoleDefinition(role string) error {
 | |
| 	// This would contain the role definition logic from CHORUS
 | |
| 	c.Agent.Role = role
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // GetRoleAuthority returns the authority level for a role (from CHORUS)
 | |
| func (c *Config) GetRoleAuthority(role string) (string, error) {
 | |
| 	// This would contain the authority mapping from CHORUS
 | |
| 	switch role {
 | |
| 	case "admin":
 | |
| 		return "master", nil
 | |
| 	default:
 | |
| 		return "member", nil
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Helper functions for environment variable parsing
 | |
| 
 | |
| func getEnvOrDefault(key, defaultValue string) string {
 | |
| 	if value := os.Getenv(key); value != "" {
 | |
| 		return value
 | |
| 	}
 | |
| 	return defaultValue
 | |
| }
 | |
| 
 | |
| func getEnvIntOrDefault(key string, defaultValue int) int {
 | |
| 	if value := os.Getenv(key); value != "" {
 | |
| 		if parsed, err := strconv.Atoi(value); err == nil {
 | |
| 			return parsed
 | |
| 		}
 | |
| 	}
 | |
| 	return defaultValue
 | |
| }
 | |
| 
 | |
| func getEnvBoolOrDefault(key string, defaultValue bool) bool {
 | |
| 	if value := os.Getenv(key); value != "" {
 | |
| 		if parsed, err := strconv.ParseBool(value); err == nil {
 | |
| 			return parsed
 | |
| 		}
 | |
| 	}
 | |
| 	return defaultValue
 | |
| }
 | |
| 
 | |
| func getEnvDurationOrDefault(key string, defaultValue time.Duration) time.Duration {
 | |
| 	if value := os.Getenv(key); value != "" {
 | |
| 		if parsed, err := time.ParseDuration(value); err == nil {
 | |
| 			return parsed
 | |
| 		}
 | |
| 	}
 | |
| 	return defaultValue
 | |
| }
 | |
| 
 | |
| func getEnvArrayOrDefault(key string, defaultValue []string) []string {
 | |
| 	if value := os.Getenv(key); value != "" {
 | |
| 		return strings.Split(value, ",")
 | |
| 	}
 | |
| 	return defaultValue
 | |
| }
 | |
| 
 | |
| // getEnvOrFileContent reads from environment variable or file (for Docker secrets support)
 | |
| func getEnvOrFileContent(envKey, fileEnvKey string) string {
 | |
| 	// First try the direct environment variable
 | |
| 	if value := os.Getenv(envKey); value != "" {
 | |
| 		return value
 | |
| 	}
 | |
| 
 | |
| 	// Then try reading from file path specified in fileEnvKey
 | |
| 	if filePath := os.Getenv(fileEnvKey); filePath != "" {
 | |
| 		if content, err := ioutil.ReadFile(filePath); err == nil {
 | |
| 			return strings.TrimSpace(string(content))
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return ""
 | |
| }
 | |
| 
 | |
| // IsSetupRequired checks if setup is required (always false for containers)
 | |
| func IsSetupRequired(configPath string) bool {
 | |
| 	return false // Containers are always pre-configured via environment
 | |
| }
 | |
| 
 | |
| // IsValidConfiguration validates configuration (simplified for containers)
 | |
| func IsValidConfiguration(cfg *Config) bool {
 | |
| 	return cfg.License.LicenseID != "" && cfg.License.ClusterID != ""
 | |
| }
 | |
| 
 | |
| // LoadConfig loads configuration from file (for API compatibility)
 | |
| func LoadConfig(configPath string) (*Config, error) {
 | |
| 	// For containers, always load from environment
 | |
| 	return LoadFromEnvironment()
 | |
| }
 | |
| 
 | |
| // SaveConfig saves configuration to file (stub for API compatibility)
 | |
| func SaveConfig(cfg *Config, configPath string) error {
 | |
| 	// For containers, configuration is environment-based, so this is a no-op
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // LoadRuntimeConfig loads configuration with runtime assignment support
 | |
| func LoadRuntimeConfig() (*RuntimeConfig, error) {
 | |
| 	// Load base configuration from environment
 | |
| 	baseConfig, err := LoadFromEnvironment()
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to load base configuration: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	// Create runtime configuration manager
 | |
| 	runtimeConfig := NewRuntimeConfig(baseConfig)
 | |
| 
 | |
| 	return runtimeConfig, nil
 | |
| }
 |