Major milestone: CHORUS leader election is now fully functional! ## Key Features Implemented: ### 🗳️ Leader Election Core - Fixed root cause: nodes now trigger elections when no admin exists - Added randomized election delays to prevent simultaneous elections - Implemented concurrent election prevention (only one election at a time) - Added proper election state management and transitions ### 📡 Admin Discovery System - Enhanced discovery requests with "WHOAMI" debug messages - Fixed discovery responses to properly include current leader ID - Added comprehensive discovery request/response logging - Implemented admin confirmation from multiple sources ### 🔧 Configuration Improvements - Increased discovery timeout from 3s to 15s for better reliability - Added proper Docker Hub image deployment workflow - Updated build process to use correct chorus-agent binary (not deprecated chorus) - Added static compilation flags for Alpine Linux compatibility ### 🐛 Critical Fixes - Fixed build process confusion between chorus vs chorus-agent binaries - Added missing admin_election capability to enable leader elections - Corrected discovery logic to handle zero admin responses - Enhanced debugging with detailed state and timing information ## Current Operational Status: ✅ Admin Election: Working with proper consensus ✅ Heartbeat System: 15-second intervals from elected admin ✅ Discovery Protocol: Nodes can find and confirm current admin ✅ P2P Connectivity: 5+ connected peers with libp2p ✅ SLURP Functionality: Enabled on admin nodes ✅ BACKBEAT Integration: Tempo synchronization working ✅ Container Health: All health checks passing ## Technical Details: - Election uses weighted scoring based on uptime, capabilities, and resources - Randomized delays prevent election storms (30-45s wait periods) - Discovery responses include current leader ID for network-wide consensus - State management prevents multiple concurrent elections - Enhanced logging provides full visibility into election process 🎉 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
380 lines
13 KiB
Go
380 lines
13 KiB
Go
package config
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// This is a container-adapted version of CHORUS's config system
|
|
// All configuration comes from environment variables instead of YAML files
|
|
|
|
// Config represents the complete CHORUS configuration loaded from environment variables
|
|
type Config struct {
|
|
Agent AgentConfig `yaml:"agent"`
|
|
Network NetworkConfig `yaml:"network"`
|
|
License LicenseConfig `yaml:"license"`
|
|
AI AIConfig `yaml:"ai"`
|
|
Logging LoggingConfig `yaml:"logging"`
|
|
V2 V2Config `yaml:"v2"`
|
|
UCXL UCXLConfig `yaml:"ucxl"`
|
|
Slurp SlurpConfig `yaml:"slurp"`
|
|
Security SecurityConfig `yaml:"security"`
|
|
WHOOSHAPI WHOOSHAPIConfig `yaml:"whoosh_api"`
|
|
}
|
|
|
|
// AgentConfig defines agent-specific settings
|
|
type AgentConfig struct {
|
|
ID string `yaml:"id"`
|
|
Specialization string `yaml:"specialization"`
|
|
MaxTasks int `yaml:"max_tasks"`
|
|
Capabilities []string `yaml:"capabilities"`
|
|
Models []string `yaml:"models"`
|
|
Role string `yaml:"role"`
|
|
Project string `yaml:"project"`
|
|
Expertise []string `yaml:"expertise"`
|
|
ReportsTo string `yaml:"reports_to"`
|
|
Deliverables []string `yaml:"deliverables"`
|
|
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
|
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
|
}
|
|
|
|
// NetworkConfig defines network and API settings
|
|
type NetworkConfig struct {
|
|
P2PPort int `yaml:"p2p_port"`
|
|
APIPort int `yaml:"api_port"`
|
|
HealthPort int `yaml:"health_port"`
|
|
BindAddr string `yaml:"bind_address"`
|
|
}
|
|
|
|
// LicenseConfig defines licensing settings (adapted from CHORUS)
|
|
type LicenseConfig struct {
|
|
LicenseID string `yaml:"license_id"`
|
|
ClusterID string `yaml:"cluster_id"`
|
|
OrganizationName string `yaml:"organization_name"`
|
|
KachingURL string `yaml:"kaching_url"`
|
|
IsActive bool `yaml:"is_active"`
|
|
LastValidated time.Time `yaml:"last_validated"`
|
|
GracePeriodHours int `yaml:"grace_period_hours"`
|
|
LicenseType string `yaml:"license_type"`
|
|
ExpiresAt time.Time `yaml:"expires_at"`
|
|
MaxNodes int `yaml:"max_nodes"`
|
|
}
|
|
|
|
// AIConfig defines AI service settings
|
|
type AIConfig struct {
|
|
Provider string `yaml:"provider"`
|
|
Ollama OllamaConfig `yaml:"ollama"`
|
|
ResetData ResetDataConfig `yaml:"resetdata"`
|
|
}
|
|
|
|
// OllamaConfig defines Ollama-specific settings
|
|
type OllamaConfig struct {
|
|
Endpoint string `yaml:"endpoint"`
|
|
Timeout time.Duration `yaml:"timeout"`
|
|
}
|
|
|
|
// ResetDataConfig defines ResetData LLM service settings
|
|
type ResetDataConfig struct {
|
|
BaseURL string `yaml:"base_url"`
|
|
APIKey string `yaml:"api_key"`
|
|
Model string `yaml:"model"`
|
|
Timeout time.Duration `yaml:"timeout"`
|
|
}
|
|
|
|
// LoggingConfig defines logging settings
|
|
type LoggingConfig struct {
|
|
Level string `yaml:"level"`
|
|
Format string `yaml:"format"`
|
|
}
|
|
|
|
// V2Config defines v2-specific settings (from CHORUS)
|
|
type V2Config struct {
|
|
DHT DHTConfig `yaml:"dht"`
|
|
}
|
|
|
|
// DHTConfig defines DHT settings
|
|
type DHTConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
BootstrapPeers []string `yaml:"bootstrap_peers"`
|
|
}
|
|
|
|
// UCXLConfig defines UCXL protocol settings
|
|
type UCXLConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
Server ServerConfig `yaml:"server"`
|
|
Storage StorageConfig `yaml:"storage"`
|
|
Resolution ResolutionConfig `yaml:"resolution"`
|
|
}
|
|
|
|
// ServerConfig defines server settings
|
|
type ServerConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
Port int `yaml:"port"`
|
|
BasePath string `yaml:"base_path"`
|
|
}
|
|
|
|
// StorageConfig defines storage settings
|
|
type StorageConfig struct {
|
|
Directory string `yaml:"directory"`
|
|
}
|
|
|
|
// ResolutionConfig defines resolution settings
|
|
type ResolutionConfig struct {
|
|
CacheTTL time.Duration `yaml:"cache_ttl"`
|
|
}
|
|
|
|
// SlurpConfig defines SLURP settings
|
|
type SlurpConfig struct {
|
|
Enabled bool `yaml:"enabled"`
|
|
}
|
|
|
|
// WHOOSHAPIConfig defines WHOOSH API integration settings
|
|
type WHOOSHAPIConfig struct {
|
|
URL string `yaml:"url"`
|
|
BaseURL string `yaml:"base_url"`
|
|
Token string `yaml:"token"`
|
|
Enabled bool `yaml:"enabled"`
|
|
}
|
|
|
|
// LoadFromEnvironment loads configuration from environment variables
|
|
func LoadFromEnvironment() (*Config, error) {
|
|
cfg := &Config{
|
|
Agent: AgentConfig{
|
|
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
|
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
|
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
|
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
|
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
|
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
|
Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"),
|
|
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
|
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
|
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
|
ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
|
|
DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
|
|
},
|
|
Network: NetworkConfig{
|
|
P2PPort: getEnvIntOrDefault("CHORUS_P2P_PORT", 9000),
|
|
APIPort: getEnvIntOrDefault("CHORUS_API_PORT", 8080),
|
|
HealthPort: getEnvIntOrDefault("CHORUS_HEALTH_PORT", 8081),
|
|
BindAddr: getEnvOrDefault("CHORUS_BIND_ADDRESS", "0.0.0.0"),
|
|
},
|
|
License: LicenseConfig{
|
|
LicenseID: getEnvOrFileContent("CHORUS_LICENSE_ID", "CHORUS_LICENSE_ID_FILE"),
|
|
ClusterID: getEnvOrDefault("CHORUS_CLUSTER_ID", "default-cluster"),
|
|
OrganizationName: getEnvOrDefault("CHORUS_ORGANIZATION_NAME", ""),
|
|
KachingURL: getEnvOrDefault("CHORUS_KACHING_URL", "https://kaching.chorus.services"),
|
|
IsActive: false, // Will be set during validation
|
|
GracePeriodHours: getEnvIntOrDefault("CHORUS_GRACE_PERIOD_HOURS", 72),
|
|
},
|
|
AI: AIConfig{
|
|
Provider: getEnvOrDefault("CHORUS_AI_PROVIDER", "resetdata"),
|
|
Ollama: OllamaConfig{
|
|
Endpoint: getEnvOrDefault("OLLAMA_ENDPOINT", "http://localhost:11434"),
|
|
Timeout: getEnvDurationOrDefault("OLLAMA_TIMEOUT", 30*time.Second),
|
|
},
|
|
ResetData: ResetDataConfig{
|
|
BaseURL: getEnvOrDefault("RESETDATA_BASE_URL", "https://models.au-syd.resetdata.ai/v1"),
|
|
APIKey: getEnvOrFileContent("RESETDATA_API_KEY", "RESETDATA_API_KEY_FILE"),
|
|
Model: getEnvOrDefault("RESETDATA_MODEL", "meta/llama-3.1-8b-instruct"),
|
|
Timeout: getEnvDurationOrDefault("RESETDATA_TIMEOUT", 30*time.Second),
|
|
},
|
|
},
|
|
Logging: LoggingConfig{
|
|
Level: getEnvOrDefault("LOG_LEVEL", "info"),
|
|
Format: getEnvOrDefault("LOG_FORMAT", "structured"),
|
|
},
|
|
V2: V2Config{
|
|
DHT: DHTConfig{
|
|
Enabled: getEnvBoolOrDefault("CHORUS_DHT_ENABLED", true),
|
|
BootstrapPeers: getEnvArrayOrDefault("CHORUS_BOOTSTRAP_PEERS", []string{}),
|
|
},
|
|
},
|
|
UCXL: UCXLConfig{
|
|
Enabled: getEnvBoolOrDefault("CHORUS_UCXL_ENABLED", true),
|
|
Server: ServerConfig{
|
|
Enabled: getEnvBoolOrDefault("CHORUS_UCXL_SERVER_ENABLED", true),
|
|
Port: getEnvIntOrDefault("CHORUS_UCXL_SERVER_PORT", 8082),
|
|
BasePath: getEnvOrDefault("CHORUS_UCXL_SERVER_BASE_PATH", ""),
|
|
},
|
|
Storage: StorageConfig{
|
|
Directory: getEnvOrDefault("CHORUS_UCXL_STORAGE_DIRECTORY", "/tmp/chorus-ucxi-storage"),
|
|
},
|
|
Resolution: ResolutionConfig{
|
|
CacheTTL: getEnvDurationOrDefault("CHORUS_UCXL_CACHE_TTL", 1*time.Hour),
|
|
},
|
|
},
|
|
Slurp: SlurpConfig{
|
|
Enabled: getEnvBoolOrDefault("CHORUS_SLURP_ENABLED", false),
|
|
},
|
|
Security: SecurityConfig{
|
|
KeyRotationDays: getEnvIntOrDefault("CHORUS_KEY_ROTATION_DAYS", 30),
|
|
AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
|
|
AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
|
|
ElectionConfig: ElectionConfig{
|
|
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 15*time.Second),
|
|
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
|
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
|
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
|
LeadershipScoring: &LeadershipScoring{
|
|
UptimeWeight: 0.4,
|
|
CapabilityWeight: 0.3,
|
|
ExperienceWeight: 0.2,
|
|
LoadWeight: 0.1,
|
|
},
|
|
},
|
|
},
|
|
WHOOSHAPI: WHOOSHAPIConfig{
|
|
URL: getEnvOrDefault("WHOOSH_API_URL", "http://localhost:3000"),
|
|
BaseURL: getEnvOrDefault("WHOOSH_API_BASE_URL", "http://localhost:3000"),
|
|
Token: os.Getenv("WHOOSH_API_TOKEN"),
|
|
Enabled: getEnvBoolOrDefault("WHOOSH_API_ENABLED", false),
|
|
},
|
|
}
|
|
|
|
// Validate required configuration
|
|
if err := cfg.Validate(); err != nil {
|
|
return nil, fmt.Errorf("configuration validation failed: %w", err)
|
|
}
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
// Validate ensures all required configuration is present
|
|
func (c *Config) Validate() error {
|
|
if c.License.LicenseID == "" {
|
|
return fmt.Errorf("CHORUS_LICENSE_ID is required")
|
|
}
|
|
|
|
if c.Agent.ID == "" {
|
|
// Auto-generate agent ID if not provided
|
|
hostname, _ := os.Hostname()
|
|
containerID := os.Getenv("HOSTNAME") // Docker sets this to container ID
|
|
if containerID != "" && containerID != hostname {
|
|
c.Agent.ID = fmt.Sprintf("chorus-%s", containerID[:12])
|
|
} else {
|
|
c.Agent.ID = fmt.Sprintf("chorus-%s", hostname)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ApplyRoleDefinition applies role-based configuration (from CHORUS)
|
|
func (c *Config) ApplyRoleDefinition(role string) error {
|
|
// This would contain the role definition logic from CHORUS
|
|
c.Agent.Role = role
|
|
return nil
|
|
}
|
|
|
|
// GetRoleAuthority returns the authority level for a role (from CHORUS)
|
|
func (c *Config) GetRoleAuthority(role string) (string, error) {
|
|
// This would contain the authority mapping from CHORUS
|
|
switch role {
|
|
case "admin":
|
|
return "master", nil
|
|
default:
|
|
return "member", nil
|
|
}
|
|
}
|
|
|
|
// Helper functions for environment variable parsing
|
|
|
|
func getEnvOrDefault(key, defaultValue string) string {
|
|
if value := os.Getenv(key); value != "" {
|
|
return value
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func getEnvIntOrDefault(key string, defaultValue int) int {
|
|
if value := os.Getenv(key); value != "" {
|
|
if parsed, err := strconv.Atoi(value); err == nil {
|
|
return parsed
|
|
}
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func getEnvBoolOrDefault(key string, defaultValue bool) bool {
|
|
if value := os.Getenv(key); value != "" {
|
|
if parsed, err := strconv.ParseBool(value); err == nil {
|
|
return parsed
|
|
}
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func getEnvDurationOrDefault(key string, defaultValue time.Duration) time.Duration {
|
|
if value := os.Getenv(key); value != "" {
|
|
if parsed, err := time.ParseDuration(value); err == nil {
|
|
return parsed
|
|
}
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
func getEnvArrayOrDefault(key string, defaultValue []string) []string {
|
|
if value := os.Getenv(key); value != "" {
|
|
return strings.Split(value, ",")
|
|
}
|
|
return defaultValue
|
|
}
|
|
|
|
// getEnvOrFileContent reads from environment variable or file (for Docker secrets support)
|
|
func getEnvOrFileContent(envKey, fileEnvKey string) string {
|
|
// First try the direct environment variable
|
|
if value := os.Getenv(envKey); value != "" {
|
|
return value
|
|
}
|
|
|
|
// Then try reading from file path specified in fileEnvKey
|
|
if filePath := os.Getenv(fileEnvKey); filePath != "" {
|
|
if content, err := ioutil.ReadFile(filePath); err == nil {
|
|
return strings.TrimSpace(string(content))
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// IsSetupRequired checks if setup is required (always false for containers)
|
|
func IsSetupRequired(configPath string) bool {
|
|
return false // Containers are always pre-configured via environment
|
|
}
|
|
|
|
// IsValidConfiguration validates configuration (simplified for containers)
|
|
func IsValidConfiguration(cfg *Config) bool {
|
|
return cfg.License.LicenseID != "" && cfg.License.ClusterID != ""
|
|
}
|
|
|
|
// LoadConfig loads configuration from file (for API compatibility)
|
|
func LoadConfig(configPath string) (*Config, error) {
|
|
// For containers, always load from environment
|
|
return LoadFromEnvironment()
|
|
}
|
|
|
|
// SaveConfig saves configuration to file (stub for API compatibility)
|
|
func SaveConfig(cfg *Config, configPath string) error {
|
|
// For containers, configuration is environment-based, so this is a no-op
|
|
return nil
|
|
}
|
|
|
|
// LoadRuntimeConfig loads configuration with runtime assignment support
|
|
func LoadRuntimeConfig() (*RuntimeConfig, error) {
|
|
// Load base configuration from environment
|
|
baseConfig, err := LoadFromEnvironment()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load base configuration: %w", err)
|
|
}
|
|
|
|
// Create runtime configuration manager
|
|
runtimeConfig := NewRuntimeConfig(baseConfig)
|
|
|
|
return runtimeConfig, nil
|
|
}
|