Harden CHORUS security and messaging stack
This commit is contained in:
@@ -28,17 +28,18 @@ type Config struct {
|
||||
|
||||
// AgentConfig defines agent-specific settings
|
||||
type AgentConfig struct {
|
||||
ID string `yaml:"id"`
|
||||
Specialization string `yaml:"specialization"`
|
||||
MaxTasks int `yaml:"max_tasks"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
Models []string `yaml:"models"`
|
||||
Role string `yaml:"role"`
|
||||
Expertise []string `yaml:"expertise"`
|
||||
ReportsTo string `yaml:"reports_to"`
|
||||
Deliverables []string `yaml:"deliverables"`
|
||||
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
||||
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
||||
ID string `yaml:"id"`
|
||||
Specialization string `yaml:"specialization"`
|
||||
MaxTasks int `yaml:"max_tasks"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
Models []string `yaml:"models"`
|
||||
Role string `yaml:"role"`
|
||||
Project string `yaml:"project"`
|
||||
Expertise []string `yaml:"expertise"`
|
||||
ReportsTo string `yaml:"reports_to"`
|
||||
Deliverables []string `yaml:"deliverables"`
|
||||
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
||||
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
||||
}
|
||||
|
||||
// NetworkConfig defines network and API settings
|
||||
@@ -65,9 +66,9 @@ type LicenseConfig struct {
|
||||
|
||||
// AIConfig defines AI service settings
|
||||
type AIConfig struct {
|
||||
Provider string `yaml:"provider"`
|
||||
Ollama OllamaConfig `yaml:"ollama"`
|
||||
ResetData ResetDataConfig `yaml:"resetdata"`
|
||||
Provider string `yaml:"provider"`
|
||||
Ollama OllamaConfig `yaml:"ollama"`
|
||||
ResetData ResetDataConfig `yaml:"resetdata"`
|
||||
}
|
||||
|
||||
// OllamaConfig defines Ollama-specific settings
|
||||
@@ -78,10 +79,10 @@ type OllamaConfig struct {
|
||||
|
||||
// ResetDataConfig defines ResetData LLM service settings
|
||||
type ResetDataConfig struct {
|
||||
BaseURL string `yaml:"base_url"`
|
||||
APIKey string `yaml:"api_key"`
|
||||
Model string `yaml:"model"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
APIKey string `yaml:"api_key"`
|
||||
Model string `yaml:"model"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
}
|
||||
|
||||
// LoggingConfig defines logging settings
|
||||
@@ -103,9 +104,9 @@ type DHTConfig struct {
|
||||
|
||||
// UCXLConfig defines UCXL protocol settings
|
||||
type UCXLConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Server ServerConfig `yaml:"server"`
|
||||
Storage StorageConfig `yaml:"storage"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Server ServerConfig `yaml:"server"`
|
||||
Storage StorageConfig `yaml:"storage"`
|
||||
Resolution ResolutionConfig `yaml:"resolution"`
|
||||
}
|
||||
|
||||
@@ -133,25 +134,26 @@ type SlurpConfig struct {
|
||||
|
||||
// WHOOSHAPIConfig defines WHOOSH API integration settings
|
||||
type WHOOSHAPIConfig struct {
|
||||
URL string `yaml:"url"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
Token string `yaml:"token"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
URL string `yaml:"url"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
Token string `yaml:"token"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
}
|
||||
|
||||
// LoadFromEnvironment loads configuration from environment variables
|
||||
func LoadFromEnvironment() (*Config, error) {
|
||||
cfg := &Config{
|
||||
Agent: AgentConfig{
|
||||
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
||||
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
||||
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
||||
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
||||
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
||||
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
||||
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
||||
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
||||
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
||||
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
||||
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
||||
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
||||
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
||||
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
||||
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
||||
Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"),
|
||||
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
||||
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
||||
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
||||
ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
|
||||
DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
|
||||
},
|
||||
@@ -214,10 +216,10 @@ func LoadFromEnvironment() (*Config, error) {
|
||||
AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
|
||||
AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
|
||||
ElectionConfig: ElectionConfig{
|
||||
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
||||
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
||||
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
||||
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
||||
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
||||
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
||||
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
||||
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
||||
LeadershipScoring: &LeadershipScoring{
|
||||
UptimeWeight: 0.4,
|
||||
CapabilityWeight: 0.3,
|
||||
@@ -247,7 +249,7 @@ func (c *Config) Validate() error {
|
||||
if c.License.LicenseID == "" {
|
||||
return fmt.Errorf("CHORUS_LICENSE_ID is required")
|
||||
}
|
||||
|
||||
|
||||
if c.Agent.ID == "" {
|
||||
// Auto-generate agent ID if not provided
|
||||
hostname, _ := os.Hostname()
|
||||
@@ -258,7 +260,7 @@ func (c *Config) Validate() error {
|
||||
c.Agent.ID = fmt.Sprintf("chorus-%s", hostname)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -329,14 +331,14 @@ func getEnvOrFileContent(envKey, fileEnvKey string) string {
|
||||
if value := os.Getenv(envKey); value != "" {
|
||||
return value
|
||||
}
|
||||
|
||||
|
||||
// Then try reading from file path specified in fileEnvKey
|
||||
if filePath := os.Getenv(fileEnvKey); filePath != "" {
|
||||
if content, err := ioutil.ReadFile(filePath); err == nil {
|
||||
return strings.TrimSpace(string(content))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -360,4 +362,4 @@ func LoadConfig(configPath string) (*Config, error) {
|
||||
func SaveConfig(cfg *Config, configPath string) error {
|
||||
// For containers, configuration is environment-based, so this is a no-op
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,27 +12,27 @@ const (
|
||||
|
||||
// SecurityConfig defines security-related configuration
|
||||
type SecurityConfig struct {
|
||||
KeyRotationDays int `yaml:"key_rotation_days"`
|
||||
AuditLogging bool `yaml:"audit_logging"`
|
||||
AuditPath string `yaml:"audit_path"`
|
||||
ElectionConfig ElectionConfig `yaml:"election"`
|
||||
KeyRotationDays int `yaml:"key_rotation_days"`
|
||||
AuditLogging bool `yaml:"audit_logging"`
|
||||
AuditPath string `yaml:"audit_path"`
|
||||
ElectionConfig ElectionConfig `yaml:"election"`
|
||||
}
|
||||
|
||||
// ElectionConfig defines election timing and behavior settings
|
||||
type ElectionConfig struct {
|
||||
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
||||
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
||||
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
||||
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
||||
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
||||
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
||||
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
||||
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
||||
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
||||
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
||||
}
|
||||
|
||||
// LeadershipScoring defines weights for election scoring
|
||||
type LeadershipScoring struct {
|
||||
UptimeWeight float64 `yaml:"uptime_weight"`
|
||||
CapabilityWeight float64 `yaml:"capability_weight"`
|
||||
ExperienceWeight float64 `yaml:"experience_weight"`
|
||||
LoadWeight float64 `yaml:"load_weight"`
|
||||
UptimeWeight float64 `yaml:"uptime_weight"`
|
||||
CapabilityWeight float64 `yaml:"capability_weight"`
|
||||
ExperienceWeight float64 `yaml:"experience_weight"`
|
||||
LoadWeight float64 `yaml:"load_weight"`
|
||||
}
|
||||
|
||||
// AgeKeyPair represents an Age encryption key pair
|
||||
@@ -43,14 +43,14 @@ type AgeKeyPair struct {
|
||||
|
||||
// RoleDefinition represents a role configuration
|
||||
type RoleDefinition struct {
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
AccessLevel string `yaml:"access_level"`
|
||||
AuthorityLevel string `yaml:"authority_level"`
|
||||
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
||||
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
||||
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
AccessLevel string `yaml:"access_level"`
|
||||
AuthorityLevel string `yaml:"authority_level"`
|
||||
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
||||
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
||||
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
||||
}
|
||||
|
||||
// GetPredefinedRoles returns the predefined roles for the system
|
||||
@@ -65,7 +65,7 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
|
||||
CanDecrypt: []string{"project_manager", "backend_developer", "frontend_developer", "devops_engineer", "security_engineer"},
|
||||
},
|
||||
"backend_developer": {
|
||||
Name: "backend_developer",
|
||||
Name: "backend_developer",
|
||||
Description: "Backend development and API work",
|
||||
Capabilities: []string{"backend", "api", "database"},
|
||||
AccessLevel: "medium",
|
||||
@@ -90,12 +90,52 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
|
||||
},
|
||||
"security_engineer": {
|
||||
Name: "security_engineer",
|
||||
Description: "Security oversight and hardening",
|
||||
Description: "Security oversight and hardening",
|
||||
Capabilities: []string{"security", "audit", "compliance"},
|
||||
AccessLevel: "high",
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"},
|
||||
},
|
||||
"security_expert": {
|
||||
Name: "security_expert",
|
||||
Description: "Advanced security analysis and policy work",
|
||||
Capabilities: []string{"security", "policy", "response"},
|
||||
AccessLevel: "high",
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"security_expert", "security_engineer", "project_manager"},
|
||||
},
|
||||
"senior_software_architect": {
|
||||
Name: "senior_software_architect",
|
||||
Description: "Architecture governance and system design",
|
||||
Capabilities: []string{"architecture", "design", "coordination"},
|
||||
AccessLevel: "high",
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"senior_software_architect", "project_manager", "backend_developer", "frontend_developer"},
|
||||
},
|
||||
"qa_engineer": {
|
||||
Name: "qa_engineer",
|
||||
Description: "Quality assurance and testing",
|
||||
Capabilities: []string{"testing", "validation"},
|
||||
AccessLevel: "medium",
|
||||
AuthorityLevel: AuthorityFull,
|
||||
CanDecrypt: []string{"qa_engineer", "backend_developer", "frontend_developer"},
|
||||
},
|
||||
"readonly_user": {
|
||||
Name: "readonly_user",
|
||||
Description: "Read-only observer with audit access",
|
||||
Capabilities: []string{"observation"},
|
||||
AccessLevel: "low",
|
||||
AuthorityLevel: AuthorityReadOnly,
|
||||
CanDecrypt: []string{"readonly_user"},
|
||||
},
|
||||
"suggestion_only_role": {
|
||||
Name: "suggestion_only_role",
|
||||
Description: "Can propose suggestions but not execute",
|
||||
Capabilities: []string{"recommendation"},
|
||||
AccessLevel: "low",
|
||||
AuthorityLevel: AuthoritySuggestion,
|
||||
CanDecrypt: []string{"suggestion_only_role"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,16 +146,16 @@ func (c *Config) CanDecryptRole(targetRole string) (bool, error) {
|
||||
if !exists {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
|
||||
targetRoleDef, exists := roles[targetRole]
|
||||
if !exists {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
|
||||
// Simple access level check
|
||||
currentLevel := getAccessLevelValue(currentRole.AccessLevel)
|
||||
targetLevel := getAccessLevelValue(targetRoleDef.AccessLevel)
|
||||
|
||||
|
||||
return currentLevel >= targetLevel, nil
|
||||
}
|
||||
|
||||
@@ -130,4 +170,4 @@ func getAccessLevelValue(level string) int {
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
213
pkg/dht/dht.go
213
pkg/dht/dht.go
@@ -6,33 +6,34 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"crypto/sha256"
|
||||
"github.com/ipfs/go-cid"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/core/protocol"
|
||||
"github.com/libp2p/go-libp2p/core/routing"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
"github.com/multiformats/go-multihash"
|
||||
"github.com/ipfs/go-cid"
|
||||
"crypto/sha256"
|
||||
)
|
||||
|
||||
// LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery
|
||||
type LibP2PDHT struct {
|
||||
host host.Host
|
||||
kdht *dht.IpfsDHT
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *Config
|
||||
|
||||
host host.Host
|
||||
kdht *dht.IpfsDHT
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *Config
|
||||
startTime time.Time
|
||||
|
||||
// Bootstrap state
|
||||
bootstrapped bool
|
||||
bootstrapMutex sync.RWMutex
|
||||
|
||||
|
||||
// Peer management
|
||||
knownPeers map[peer.ID]*PeerInfo
|
||||
peersMutex sync.RWMutex
|
||||
|
||||
|
||||
// Replication management
|
||||
replicationManager *ReplicationManager
|
||||
}
|
||||
@@ -41,30 +42,32 @@ type LibP2PDHT struct {
|
||||
type Config struct {
|
||||
// Bootstrap nodes for initial DHT discovery
|
||||
BootstrapPeers []multiaddr.Multiaddr
|
||||
|
||||
|
||||
// Protocol prefix for CHORUS DHT
|
||||
ProtocolPrefix string
|
||||
|
||||
|
||||
// Bootstrap timeout
|
||||
BootstrapTimeout time.Duration
|
||||
|
||||
|
||||
// Peer discovery interval
|
||||
DiscoveryInterval time.Duration
|
||||
|
||||
|
||||
// DHT mode (client, server, auto)
|
||||
Mode dht.ModeOpt
|
||||
|
||||
|
||||
// Enable automatic bootstrap
|
||||
AutoBootstrap bool
|
||||
}
|
||||
|
||||
// PeerInfo holds information about discovered peers
|
||||
const defaultProviderResultLimit = 20
|
||||
|
||||
type PeerInfo struct {
|
||||
ID peer.ID
|
||||
Addresses []multiaddr.Multiaddr
|
||||
Agent string
|
||||
Role string
|
||||
LastSeen time.Time
|
||||
ID peer.ID
|
||||
Addresses []multiaddr.Multiaddr
|
||||
Agent string
|
||||
Role string
|
||||
LastSeen time.Time
|
||||
Capabilities []string
|
||||
}
|
||||
|
||||
@@ -74,23 +77,28 @@ func DefaultConfig() *Config {
|
||||
ProtocolPrefix: "/CHORUS",
|
||||
BootstrapTimeout: 30 * time.Second,
|
||||
DiscoveryInterval: 60 * time.Second,
|
||||
Mode: dht.ModeAuto,
|
||||
AutoBootstrap: true,
|
||||
Mode: dht.ModeAuto,
|
||||
AutoBootstrap: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewLibP2PDHT creates a new LibP2PDHT instance
|
||||
// NewDHT is a backward compatible helper that delegates to NewLibP2PDHT.
|
||||
func NewDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||
return NewLibP2PDHT(ctx, host, opts...)
|
||||
}
|
||||
|
||||
// NewLibP2PDHT creates a new LibP2PDHT instance
|
||||
func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||
config := DefaultConfig()
|
||||
for _, opt := range opts {
|
||||
opt(config)
|
||||
}
|
||||
|
||||
|
||||
// Create context with cancellation
|
||||
dhtCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
|
||||
// Create Kademlia DHT
|
||||
kdht, err := dht.New(dhtCtx, host,
|
||||
kdht, err := dht.New(dhtCtx, host,
|
||||
dht.Mode(config.Mode),
|
||||
dht.ProtocolPrefix(protocol.ID(config.ProtocolPrefix)),
|
||||
)
|
||||
@@ -98,22 +106,23 @@ func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PD
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to create DHT: %w", err)
|
||||
}
|
||||
|
||||
|
||||
d := &LibP2PDHT{
|
||||
host: host,
|
||||
kdht: kdht,
|
||||
ctx: dhtCtx,
|
||||
cancel: cancel,
|
||||
config: config,
|
||||
startTime: time.Now(),
|
||||
knownPeers: make(map[peer.ID]*PeerInfo),
|
||||
}
|
||||
|
||||
|
||||
// Initialize replication manager
|
||||
d.replicationManager = NewReplicationManager(dhtCtx, kdht, DefaultReplicationConfig())
|
||||
|
||||
|
||||
// Start background processes
|
||||
go d.startBackgroundTasks()
|
||||
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
@@ -178,25 +187,25 @@ func WithAutoBootstrap(auto bool) Option {
|
||||
func (d *LibP2PDHT) Bootstrap() error {
|
||||
d.bootstrapMutex.Lock()
|
||||
defer d.bootstrapMutex.Unlock()
|
||||
|
||||
|
||||
if d.bootstrapped {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// Connect to bootstrap peers
|
||||
if len(d.config.BootstrapPeers) == 0 {
|
||||
// Use default IPFS bootstrap peers if none configured
|
||||
d.config.BootstrapPeers = dht.DefaultBootstrapPeers
|
||||
}
|
||||
|
||||
|
||||
// Bootstrap the DHT
|
||||
bootstrapCtx, cancel := context.WithTimeout(d.ctx, d.config.BootstrapTimeout)
|
||||
defer cancel()
|
||||
|
||||
|
||||
if err := d.kdht.Bootstrap(bootstrapCtx); err != nil {
|
||||
return fmt.Errorf("DHT bootstrap failed: %w", err)
|
||||
}
|
||||
|
||||
|
||||
// Connect to bootstrap peers
|
||||
var connected int
|
||||
for _, peerAddr := range d.config.BootstrapPeers {
|
||||
@@ -204,7 +213,7 @@ func (d *LibP2PDHT) Bootstrap() error {
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
|
||||
connectCtx, cancel := context.WithTimeout(d.ctx, 10*time.Second)
|
||||
if err := d.host.Connect(connectCtx, *addrInfo); err != nil {
|
||||
cancel()
|
||||
@@ -213,11 +222,11 @@ func (d *LibP2PDHT) Bootstrap() error {
|
||||
cancel()
|
||||
connected++
|
||||
}
|
||||
|
||||
|
||||
if connected == 0 {
|
||||
return fmt.Errorf("failed to connect to any bootstrap peers")
|
||||
}
|
||||
|
||||
|
||||
d.bootstrapped = true
|
||||
return nil
|
||||
}
|
||||
@@ -233,13 +242,13 @@ func (d *LibP2PDHT) IsBootstrapped() bool {
|
||||
func (d *LibP2PDHT) keyToCID(key string) (cid.Cid, error) {
|
||||
// Hash the key
|
||||
hash := sha256.Sum256([]byte(key))
|
||||
|
||||
|
||||
// Create multihash
|
||||
mh, err := multihash.EncodeName(hash[:], "sha2-256")
|
||||
if err != nil {
|
||||
return cid.Undef, err
|
||||
}
|
||||
|
||||
|
||||
// Create CID
|
||||
return cid.NewCidV1(cid.Raw, mh), nil
|
||||
}
|
||||
@@ -249,13 +258,13 @@ func (d *LibP2PDHT) Provide(ctx context.Context, key string) error {
|
||||
if !d.IsBootstrapped() {
|
||||
return fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
// Convert key to CID
|
||||
keyCID, err := d.keyToCID(key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create CID from key: %w", err)
|
||||
}
|
||||
|
||||
|
||||
return d.kdht.Provide(ctx, keyCID, true)
|
||||
}
|
||||
|
||||
@@ -264,31 +273,32 @@ func (d *LibP2PDHT) FindProviders(ctx context.Context, key string, limit int) ([
|
||||
if !d.IsBootstrapped() {
|
||||
return nil, fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
// Convert key to CID
|
||||
keyCID, err := d.keyToCID(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CID from key: %w", err)
|
||||
}
|
||||
|
||||
// Find providers (FindProviders returns a channel and an error)
|
||||
providersChan, err := d.kdht.FindProviders(ctx, keyCID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find providers: %w", err)
|
||||
|
||||
maxProviders := limit
|
||||
if maxProviders <= 0 {
|
||||
maxProviders = defaultProviderResultLimit
|
||||
}
|
||||
|
||||
// Collect providers from channel
|
||||
providers := make([]peer.AddrInfo, 0, limit)
|
||||
// TODO: Fix libp2p FindProviders channel type mismatch
|
||||
// The channel appears to return int instead of peer.AddrInfo in this version
|
||||
_ = providersChan // Avoid unused variable error
|
||||
// for providerInfo := range providersChan {
|
||||
// providers = append(providers, providerInfo)
|
||||
// if len(providers) >= limit {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
providerCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
providersChan := d.kdht.FindProvidersAsync(providerCtx, keyCID, maxProviders)
|
||||
providers := make([]peer.AddrInfo, 0, maxProviders)
|
||||
|
||||
for providerInfo := range providersChan {
|
||||
providers = append(providers, providerInfo)
|
||||
if limit > 0 && len(providers) >= limit {
|
||||
cancel()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return providers, nil
|
||||
}
|
||||
|
||||
@@ -297,7 +307,7 @@ func (d *LibP2PDHT) PutValue(ctx context.Context, key string, value []byte) erro
|
||||
if !d.IsBootstrapped() {
|
||||
return fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
return d.kdht.PutValue(ctx, key, value)
|
||||
}
|
||||
|
||||
@@ -306,7 +316,7 @@ func (d *LibP2PDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
|
||||
if !d.IsBootstrapped() {
|
||||
return nil, fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
return d.kdht.GetValue(ctx, key)
|
||||
}
|
||||
|
||||
@@ -315,7 +325,7 @@ func (d *LibP2PDHT) FindPeer(ctx context.Context, peerID peer.ID) (peer.AddrInfo
|
||||
if !d.IsBootstrapped() {
|
||||
return peer.AddrInfo{}, fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
return d.kdht.FindPeer(ctx, peerID)
|
||||
}
|
||||
|
||||
@@ -329,14 +339,30 @@ func (d *LibP2PDHT) GetConnectedPeers() []peer.ID {
|
||||
return d.kdht.Host().Network().Peers()
|
||||
}
|
||||
|
||||
// GetStats reports basic runtime statistics for the DHT
|
||||
func (d *LibP2PDHT) GetStats() DHTStats {
|
||||
stats := DHTStats{
|
||||
TotalPeers: len(d.GetConnectedPeers()),
|
||||
Uptime: time.Since(d.startTime),
|
||||
}
|
||||
|
||||
if d.replicationManager != nil {
|
||||
if metrics := d.replicationManager.GetMetrics(); metrics != nil {
|
||||
stats.TotalKeys = int(metrics.TotalKeys)
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
// RegisterPeer registers a peer with capability information
|
||||
func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) {
|
||||
d.peersMutex.Lock()
|
||||
defer d.peersMutex.Unlock()
|
||||
|
||||
|
||||
// Get peer addresses from host
|
||||
peerInfo := d.host.Peerstore().PeerInfo(peerID)
|
||||
|
||||
|
||||
d.knownPeers[peerID] = &PeerInfo{
|
||||
ID: peerID,
|
||||
Addresses: peerInfo.Addrs,
|
||||
@@ -351,12 +377,12 @@ func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilitie
|
||||
func (d *LibP2PDHT) GetKnownPeers() map[peer.ID]*PeerInfo {
|
||||
d.peersMutex.RLock()
|
||||
defer d.peersMutex.RUnlock()
|
||||
|
||||
|
||||
result := make(map[peer.ID]*PeerInfo)
|
||||
for id, info := range d.knownPeers {
|
||||
result[id] = info
|
||||
}
|
||||
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -371,7 +397,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
||||
}
|
||||
}
|
||||
d.peersMutex.RUnlock()
|
||||
|
||||
|
||||
// Also search DHT for role-based keys
|
||||
roleKey := fmt.Sprintf("CHORUS:role:%s", role)
|
||||
providers, err := d.FindProviders(ctx, roleKey, 10)
|
||||
@@ -379,11 +405,11 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
||||
// Return local peers even if DHT search fails
|
||||
return localPeers, nil
|
||||
}
|
||||
|
||||
|
||||
// Convert providers to PeerInfo
|
||||
var result []*PeerInfo
|
||||
result = append(result, localPeers...)
|
||||
|
||||
|
||||
for _, provider := range providers {
|
||||
// Skip if we already have this peer
|
||||
found := false
|
||||
@@ -402,7 +428,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -424,10 +450,10 @@ func (d *LibP2PDHT) startBackgroundTasks() {
|
||||
if d.config.AutoBootstrap {
|
||||
go d.autoBootstrap()
|
||||
}
|
||||
|
||||
|
||||
// Start periodic peer discovery
|
||||
go d.periodicDiscovery()
|
||||
|
||||
|
||||
// Start peer cleanup
|
||||
go d.peerCleanup()
|
||||
}
|
||||
@@ -436,7 +462,7 @@ func (d *LibP2PDHT) startBackgroundTasks() {
|
||||
func (d *LibP2PDHT) autoBootstrap() {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-d.ctx.Done():
|
||||
@@ -456,7 +482,7 @@ func (d *LibP2PDHT) autoBootstrap() {
|
||||
func (d *LibP2PDHT) periodicDiscovery() {
|
||||
ticker := time.NewTicker(d.config.DiscoveryInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-d.ctx.Done():
|
||||
@@ -473,13 +499,13 @@ func (d *LibP2PDHT) periodicDiscovery() {
|
||||
func (d *LibP2PDHT) performDiscovery() {
|
||||
ctx, cancel := context.WithTimeout(d.ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
|
||||
// Look for general CHORUS peers
|
||||
providers, err := d.FindProviders(ctx, "CHORUS:peer", 10)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Update known peers
|
||||
d.peersMutex.Lock()
|
||||
for _, provider := range providers {
|
||||
@@ -498,7 +524,7 @@ func (d *LibP2PDHT) performDiscovery() {
|
||||
func (d *LibP2PDHT) peerCleanup() {
|
||||
ticker := time.NewTicker(5 * time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-d.ctx.Done():
|
||||
@@ -513,9 +539,9 @@ func (d *LibP2PDHT) peerCleanup() {
|
||||
func (d *LibP2PDHT) cleanupStalePeers() {
|
||||
d.peersMutex.Lock()
|
||||
defer d.peersMutex.Unlock()
|
||||
|
||||
|
||||
staleThreshold := time.Now().Add(-time.Hour) // 1 hour threshold
|
||||
|
||||
|
||||
for peerID, peerInfo := range d.knownPeers {
|
||||
if peerInfo.LastSeen.Before(staleThreshold) {
|
||||
// Check if peer is still connected
|
||||
@@ -526,7 +552,7 @@ func (d *LibP2PDHT) cleanupStalePeers() {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if !connected {
|
||||
delete(d.knownPeers, peerID)
|
||||
}
|
||||
@@ -589,11 +615,11 @@ func (d *LibP2PDHT) EnableReplication(config *ReplicationConfig) error {
|
||||
if d.replicationManager != nil {
|
||||
return fmt.Errorf("replication already enabled")
|
||||
}
|
||||
|
||||
|
||||
if config == nil {
|
||||
config = DefaultReplicationConfig()
|
||||
}
|
||||
|
||||
|
||||
d.replicationManager = NewReplicationManager(d.ctx, d.kdht, config)
|
||||
return nil
|
||||
}
|
||||
@@ -603,11 +629,11 @@ func (d *LibP2PDHT) DisableReplication() error {
|
||||
if d.replicationManager == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
if err := d.replicationManager.Stop(); err != nil {
|
||||
return fmt.Errorf("failed to stop replication manager: %w", err)
|
||||
}
|
||||
|
||||
|
||||
d.replicationManager = nil
|
||||
return nil
|
||||
}
|
||||
@@ -617,13 +643,18 @@ func (d *LibP2PDHT) IsReplicationEnabled() bool {
|
||||
return d.replicationManager != nil
|
||||
}
|
||||
|
||||
// ReplicationManager returns the underlying replication manager if enabled.
|
||||
func (d *LibP2PDHT) ReplicationManager() *ReplicationManager {
|
||||
return d.replicationManager
|
||||
}
|
||||
|
||||
// Close shuts down the DHT
|
||||
func (d *LibP2PDHT) Close() error {
|
||||
// Stop replication manager first
|
||||
if d.replicationManager != nil {
|
||||
d.replicationManager.Stop()
|
||||
}
|
||||
|
||||
|
||||
d.cancel()
|
||||
return d.kdht.Close()
|
||||
}
|
||||
@@ -633,10 +664,10 @@ func (d *LibP2PDHT) RefreshRoutingTable() error {
|
||||
if !d.IsBootstrapped() {
|
||||
return fmt.Errorf("DHT not bootstrapped")
|
||||
}
|
||||
|
||||
|
||||
// RefreshRoutingTable() returns a channel with errors, not a direct error
|
||||
errChan := d.kdht.RefreshRoutingTable()
|
||||
|
||||
|
||||
// Wait for the first error (if any) from the channel
|
||||
select {
|
||||
case err := <-errChan:
|
||||
@@ -654,4 +685,4 @@ func (d *LibP2PDHT) GetDHTSize() int {
|
||||
// Host returns the underlying libp2p host
|
||||
func (d *LibP2PDHT) Host() host.Host {
|
||||
return d.host
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,546 +2,155 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libp2p/go-libp2p"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
dhtmode "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/libp2p/go-libp2p/core/test"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
)
|
||||
|
||||
type harness struct {
|
||||
ctx context.Context
|
||||
host libp2pHost
|
||||
dht *LibP2PDHT
|
||||
}
|
||||
|
||||
type libp2pHost interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
func newHarness(t *testing.T, opts ...Option) *harness {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("failed to create libp2p host: %v", err)
|
||||
}
|
||||
|
||||
options := append([]Option{WithAutoBootstrap(false)}, opts...)
|
||||
d, err := NewLibP2PDHT(ctx, host, options...)
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
|
||||
t.Cleanup(func() {
|
||||
d.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
})
|
||||
|
||||
return &harness{ctx: ctx, host: host, dht: d}
|
||||
}
|
||||
|
||||
func TestDefaultConfig(t *testing.T) {
|
||||
config := DefaultConfig()
|
||||
|
||||
if config.ProtocolPrefix != "/CHORUS" {
|
||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", config.ProtocolPrefix)
|
||||
cfg := DefaultConfig()
|
||||
|
||||
if cfg.ProtocolPrefix != "/CHORUS" {
|
||||
t.Fatalf("expected protocol prefix '/CHORUS', got %s", cfg.ProtocolPrefix)
|
||||
}
|
||||
|
||||
if config.BootstrapTimeout != 30*time.Second {
|
||||
t.Errorf("expected bootstrap timeout 30s, got %v", config.BootstrapTimeout)
|
||||
|
||||
if cfg.BootstrapTimeout != 30*time.Second {
|
||||
t.Fatalf("expected bootstrap timeout 30s, got %v", cfg.BootstrapTimeout)
|
||||
}
|
||||
|
||||
if config.Mode != dht.ModeAuto {
|
||||
t.Errorf("expected mode auto, got %v", config.Mode)
|
||||
|
||||
if cfg.Mode != dhtmode.ModeAuto {
|
||||
t.Fatalf("expected mode auto, got %v", cfg.Mode)
|
||||
}
|
||||
|
||||
if !config.AutoBootstrap {
|
||||
t.Error("expected auto bootstrap to be enabled")
|
||||
|
||||
if !cfg.AutoBootstrap {
|
||||
t.Fatal("expected auto bootstrap to be enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewDHT(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a test host
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Test with default options
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.host != host {
|
||||
t.Error("host not set correctly")
|
||||
}
|
||||
|
||||
if d.config.ProtocolPrefix != "/CHORUS" {
|
||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", d.config.ProtocolPrefix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDHTWithOptions(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Test with custom options
|
||||
d, err := NewDHT(ctx, host,
|
||||
func TestWithOptionsOverridesDefaults(t *testing.T) {
|
||||
h := newHarness(t,
|
||||
WithProtocolPrefix("/custom"),
|
||||
WithMode(dht.ModeClient),
|
||||
WithBootstrapTimeout(60*time.Second),
|
||||
WithDiscoveryInterval(120*time.Second),
|
||||
WithAutoBootstrap(false),
|
||||
WithDiscoveryInterval(2*time.Minute),
|
||||
WithBootstrapTimeout(45*time.Second),
|
||||
WithMode(dhtmode.ModeClient),
|
||||
WithAutoBootstrap(true),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
|
||||
cfg := h.dht.config
|
||||
|
||||
if cfg.ProtocolPrefix != "/custom" {
|
||||
t.Fatalf("expected protocol prefix '/custom', got %s", cfg.ProtocolPrefix)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.config.ProtocolPrefix != "/custom" {
|
||||
t.Errorf("expected protocol prefix '/custom', got %s", d.config.ProtocolPrefix)
|
||||
|
||||
if cfg.DiscoveryInterval != 2*time.Minute {
|
||||
t.Fatalf("expected discovery interval 2m, got %v", cfg.DiscoveryInterval)
|
||||
}
|
||||
|
||||
if d.config.Mode != dht.ModeClient {
|
||||
t.Errorf("expected mode client, got %v", d.config.Mode)
|
||||
|
||||
if cfg.BootstrapTimeout != 45*time.Second {
|
||||
t.Fatalf("expected bootstrap timeout 45s, got %v", cfg.BootstrapTimeout)
|
||||
}
|
||||
|
||||
if d.config.BootstrapTimeout != 60*time.Second {
|
||||
t.Errorf("expected bootstrap timeout 60s, got %v", d.config.BootstrapTimeout)
|
||||
|
||||
if cfg.Mode != dhtmode.ModeClient {
|
||||
t.Fatalf("expected mode client, got %v", cfg.Mode)
|
||||
}
|
||||
|
||||
if d.config.DiscoveryInterval != 120*time.Second {
|
||||
t.Errorf("expected discovery interval 120s, got %v", d.config.DiscoveryInterval)
|
||||
}
|
||||
|
||||
if d.config.AutoBootstrap {
|
||||
t.Error("expected auto bootstrap to be disabled")
|
||||
|
||||
if !cfg.AutoBootstrap {
|
||||
t.Fatal("expected auto bootstrap to remain enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithBootstrapPeersFromStrings(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
bootstrapAddrs := []string{
|
||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1",
|
||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2",
|
||||
}
|
||||
|
||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if len(d.config.BootstrapPeers) != 2 {
|
||||
t.Errorf("expected 2 bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
||||
}
|
||||
}
|
||||
func TestProvideRequiresBootstrap(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
|
||||
func TestWithBootstrapPeersFromStringsInvalid(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
err := h.dht.Provide(h.ctx, "key")
|
||||
if err == nil {
|
||||
t.Fatal("expected Provide to fail when not bootstrapped")
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Include invalid addresses - they should be filtered out
|
||||
bootstrapAddrs := []string{
|
||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", // valid
|
||||
"invalid-address", // invalid
|
||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", // valid
|
||||
}
|
||||
|
||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should have filtered out the invalid address
|
||||
if len(d.config.BootstrapPeers) != 2 {
|
||||
t.Errorf("expected 2 valid bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBootstrapWithoutPeers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Bootstrap should use default IPFS peers when none configured
|
||||
err = d.Bootstrap()
|
||||
// This might fail in test environment without network access, but should not panic
|
||||
if err != nil {
|
||||
// Expected in test environment
|
||||
t.Logf("Bootstrap failed as expected in test environment: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBootstrapped(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should not be bootstrapped initially
|
||||
if d.IsBootstrapped() {
|
||||
t.Error("DHT should not be bootstrapped initially")
|
||||
if !strings.Contains(err.Error(), "not bootstrapped") {
|
||||
t.Fatalf("expected error to indicate bootstrap requirement, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterPeer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
h := newHarness(t)
|
||||
|
||||
peerID := test.RandPeerIDFatal(t)
|
||||
agent := "claude"
|
||||
role := "frontend"
|
||||
capabilities := []string{"react", "javascript"}
|
||||
|
||||
d.RegisterPeer(peerID, agent, role, capabilities)
|
||||
|
||||
knownPeers := d.GetKnownPeers()
|
||||
if len(knownPeers) != 1 {
|
||||
t.Errorf("expected 1 known peer, got %d", len(knownPeers))
|
||||
|
||||
h.dht.RegisterPeer(peerID, "apollo", "platform", []string{"go"})
|
||||
|
||||
peers := h.dht.GetKnownPeers()
|
||||
|
||||
info, ok := peers[peerID]
|
||||
if !ok {
|
||||
t.Fatalf("expected peer to be tracked")
|
||||
}
|
||||
|
||||
peerInfo, exists := knownPeers[peerID]
|
||||
if !exists {
|
||||
t.Error("peer not found in known peers")
|
||||
|
||||
if info.Agent != "apollo" {
|
||||
t.Fatalf("expected agent apollo, got %s", info.Agent)
|
||||
}
|
||||
|
||||
if peerInfo.Agent != agent {
|
||||
t.Errorf("expected agent %s, got %s", agent, peerInfo.Agent)
|
||||
|
||||
if info.Role != "platform" {
|
||||
t.Fatalf("expected role platform, got %s", info.Role)
|
||||
}
|
||||
|
||||
if peerInfo.Role != role {
|
||||
t.Errorf("expected role %s, got %s", role, peerInfo.Role)
|
||||
}
|
||||
|
||||
if len(peerInfo.Capabilities) != len(capabilities) {
|
||||
t.Errorf("expected %d capabilities, got %d", len(capabilities), len(peerInfo.Capabilities))
|
||||
|
||||
if len(info.Capabilities) != 1 || info.Capabilities[0] != "go" {
|
||||
t.Fatalf("expected capability go, got %v", info.Capabilities)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetConnectedPeers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
func TestGetStatsProvidesUptime(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
|
||||
stats := h.dht.GetStats()
|
||||
|
||||
if stats.TotalPeers != 0 {
|
||||
t.Fatalf("expected zero peers, got %d", stats.TotalPeers)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Initially should have no connected peers
|
||||
peers := d.GetConnectedPeers()
|
||||
if len(peers) != 0 {
|
||||
t.Errorf("expected 0 connected peers, got %d", len(peers))
|
||||
|
||||
if stats.Uptime < 0 {
|
||||
t.Fatalf("expected non-negative uptime, got %v", stats.Uptime)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutAndGetValue(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
key := "test-key"
|
||||
value := []byte("test-value")
|
||||
|
||||
err = d.PutValue(ctx, key, value)
|
||||
if err == nil {
|
||||
t.Error("PutValue should fail when DHT not bootstrapped")
|
||||
}
|
||||
|
||||
_, err = d.GetValue(ctx, key)
|
||||
if err == nil {
|
||||
t.Error("GetValue should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProvideAndFindProviders(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
key := "test-service"
|
||||
|
||||
err = d.Provide(ctx, key)
|
||||
if err == nil {
|
||||
t.Error("Provide should fail when DHT not bootstrapped")
|
||||
}
|
||||
|
||||
_, err = d.FindProviders(ctx, key, 10)
|
||||
if err == nil {
|
||||
t.Error("FindProviders should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPeer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
peerID := test.RandPeerIDFatal(t)
|
||||
|
||||
_, err = d.FindPeer(ctx, peerID)
|
||||
if err == nil {
|
||||
t.Error("FindPeer should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPeersByRole(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Register some local peers
|
||||
peerID1 := test.RandPeerIDFatal(t)
|
||||
peerID2 := test.RandPeerIDFatal(t)
|
||||
|
||||
d.RegisterPeer(peerID1, "claude", "frontend", []string{"react"})
|
||||
d.RegisterPeer(peerID2, "claude", "backend", []string{"go"})
|
||||
|
||||
// Find frontend peers
|
||||
frontendPeers, err := d.FindPeersByRole(ctx, "frontend")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to find peers by role: %v", err)
|
||||
}
|
||||
|
||||
if len(frontendPeers) != 1 {
|
||||
t.Errorf("expected 1 frontend peer, got %d", len(frontendPeers))
|
||||
}
|
||||
|
||||
if frontendPeers[0].ID != peerID1 {
|
||||
t.Error("wrong peer returned for frontend role")
|
||||
}
|
||||
|
||||
// Find all peers with wildcard
|
||||
allPeers, err := d.FindPeersByRole(ctx, "*")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to find all peers: %v", err)
|
||||
}
|
||||
|
||||
if len(allPeers) != 2 {
|
||||
t.Errorf("expected 2 peers with wildcard, got %d", len(allPeers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnnounceRole(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.AnnounceRole(ctx, "frontend")
|
||||
if err == nil {
|
||||
t.Error("AnnounceRole should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnnounceCapability(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.AnnounceCapability(ctx, "react")
|
||||
if err == nil {
|
||||
t.Error("AnnounceCapability should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRoutingTable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
rt := d.GetRoutingTable()
|
||||
if rt == nil {
|
||||
t.Error("routing table should not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDHTSize(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
size := d.GetDHTSize()
|
||||
// Should be 0 or small initially
|
||||
if size < 0 {
|
||||
t.Errorf("DHT size should be non-negative, got %d", size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRefreshRoutingTable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.RefreshRoutingTable()
|
||||
if err == nil {
|
||||
t.Error("RefreshRoutingTable should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHost(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.Host() != host {
|
||||
t.Error("Host() should return the same host instance")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClose(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
|
||||
// Should close without error
|
||||
err = d.Close()
|
||||
if err != nil {
|
||||
t.Errorf("Close() failed: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -2,559 +2,155 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
)
|
||||
|
||||
// TestDHTSecurityPolicyEnforcement tests security policy enforcement in DHT operations
|
||||
func TestDHTSecurityPolicyEnforcement(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
currentRole string
|
||||
operation string
|
||||
ucxlAddress string
|
||||
contentType string
|
||||
expectSuccess bool
|
||||
expectedError string
|
||||
}{
|
||||
// Store operation tests
|
||||
type securityTestCase struct {
|
||||
name string
|
||||
role string
|
||||
address string
|
||||
contentType string
|
||||
expectSuccess bool
|
||||
expectErrHint string
|
||||
}
|
||||
|
||||
func newTestEncryptedStorage(cfg *config.Config) *EncryptedDHTStorage {
|
||||
return &EncryptedDHTStorage{
|
||||
ctx: context.Background(),
|
||||
config: cfg,
|
||||
nodeID: "test-node",
|
||||
cache: make(map[string]*CachedEntry),
|
||||
metrics: &StorageMetrics{LastUpdate: time.Now()},
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckStoreAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "admin_can_store_all_content",
|
||||
currentRole: "admin",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:admin:system:security_audit",
|
||||
name: "backend developer can store",
|
||||
role: "backend_developer",
|
||||
address: "agent1:backend_developer:api:endpoint",
|
||||
contentType: "decision",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "backend_developer_can_store_backend_content",
|
||||
currentRole: "backend_developer",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:backend_developer:api:endpoint_design",
|
||||
contentType: "suggestion",
|
||||
name: "project manager can store",
|
||||
role: "project_manager",
|
||||
address: "agent1:project_manager:plan:milestone",
|
||||
contentType: "decision",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "readonly_role_cannot_store",
|
||||
currentRole: "readonly_user",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
||||
contentType: "suggestion",
|
||||
expectSuccess: false,
|
||||
expectedError: "read-only authority",
|
||||
name: "read only user cannot store",
|
||||
role: "readonly_user",
|
||||
address: "agent1:readonly_user:note:observation",
|
||||
contentType: "note",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "read-only authority",
|
||||
},
|
||||
{
|
||||
name: "unknown_role_cannot_store",
|
||||
currentRole: "invalid_role",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:invalid_role:project:task",
|
||||
contentType: "decision",
|
||||
expectSuccess: false,
|
||||
expectedError: "unknown creator role",
|
||||
},
|
||||
|
||||
// Retrieve operation tests
|
||||
{
|
||||
name: "any_valid_role_can_retrieve",
|
||||
currentRole: "qa_engineer",
|
||||
operation: "retrieve",
|
||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "unknown_role_cannot_retrieve",
|
||||
currentRole: "nonexistent_role",
|
||||
operation: "retrieve",
|
||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
||||
expectSuccess: false,
|
||||
expectedError: "unknown current role",
|
||||
},
|
||||
|
||||
// Announce operation tests
|
||||
{
|
||||
name: "coordination_role_can_announce",
|
||||
currentRole: "senior_software_architect",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:senior_software_architect:architecture:blueprint",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "decision_role_can_announce",
|
||||
currentRole: "security_expert",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:security_expert:security:policy",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "suggestion_role_cannot_announce",
|
||||
currentRole: "suggestion_only_role",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:suggestion_only_role:project:idea",
|
||||
expectSuccess: false,
|
||||
expectedError: "lacks authority",
|
||||
},
|
||||
{
|
||||
name: "readonly_role_cannot_announce",
|
||||
currentRole: "readonly_user",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
||||
expectSuccess: false,
|
||||
expectedError: "lacks authority",
|
||||
name: "unknown role rejected",
|
||||
role: "ghost_role",
|
||||
address: "agent1:ghost_role:context",
|
||||
contentType: "decision",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown creator role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create test configuration
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tc.currentRole,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-security-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
// Create mock encrypted storage
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
var err error
|
||||
switch tc.operation {
|
||||
case "store":
|
||||
err = eds.checkStoreAccessPolicy(tc.currentRole, tc.ucxlAddress, tc.contentType)
|
||||
case "retrieve":
|
||||
err = eds.checkRetrieveAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
||||
case "announce":
|
||||
err = eds.checkAnnounceAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
||||
}
|
||||
|
||||
if tc.expectSuccess {
|
||||
if err != nil {
|
||||
t.Errorf("Expected %s operation to succeed for role %s, but got error: %v",
|
||||
tc.operation, tc.currentRole, err)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Expected %s operation to fail for role %s, but it succeeded",
|
||||
tc.operation, tc.currentRole)
|
||||
}
|
||||
if tc.expectedError != "" && !containsSubstring(err.Error(), tc.expectedError) {
|
||||
t.Errorf("Expected error to contain '%s', got '%s'", tc.expectedError, err.Error())
|
||||
}
|
||||
}
|
||||
err := eds.checkStoreAccessPolicy(tc.role, tc.address, tc.contentType)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDHTAuditLogging tests comprehensive audit logging for DHT operations
|
||||
func TestDHTAuditLogging(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
operation string
|
||||
role string
|
||||
ucxlAddress string
|
||||
success bool
|
||||
errorMsg string
|
||||
expectAudit bool
|
||||
}{
|
||||
func TestCheckRetrieveAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "successful_store_operation",
|
||||
operation: "store",
|
||||
role: "backend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:user_service",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
name: "qa engineer allowed",
|
||||
role: "qa_engineer",
|
||||
address: "agent1:backend_developer:api:tests",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "failed_store_operation",
|
||||
operation: "store",
|
||||
role: "readonly_user",
|
||||
ucxlAddress: "agent1:readonly_user:project:readonly_attempt",
|
||||
success: false,
|
||||
errorMsg: "read-only authority",
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "successful_retrieve_operation",
|
||||
operation: "retrieve",
|
||||
role: "frontend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:user_data",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "successful_announce_operation",
|
||||
operation: "announce",
|
||||
role: "senior_software_architect",
|
||||
ucxlAddress: "agent1:senior_software_architect:architecture:system_design",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "audit_disabled_no_logging",
|
||||
operation: "store",
|
||||
role: "backend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:no_audit",
|
||||
success: true,
|
||||
expectAudit: false,
|
||||
name: "unknown role rejected",
|
||||
role: "unknown",
|
||||
address: "agent1:backend_developer:api:tests",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown current role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create configuration with audit logging
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tc.role,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: tc.expectAudit,
|
||||
AuditPath: "/tmp/test-dht-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
// Create mock encrypted storage
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Capture audit output
|
||||
auditCaptured := false
|
||||
|
||||
// Simulate audit operation
|
||||
switch tc.operation {
|
||||
case "store":
|
||||
// Mock the audit function call
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditStoreOperation(tc.ucxlAddress, tc.role, "test-content", 1024, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
case "retrieve":
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditRetrieveOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
case "announce":
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditAnnounceOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
}
|
||||
|
||||
// Verify audit logging behavior
|
||||
if tc.expectAudit && !auditCaptured {
|
||||
t.Errorf("Expected audit logging for %s operation but none was captured", tc.operation)
|
||||
}
|
||||
if !tc.expectAudit && auditCaptured {
|
||||
t.Errorf("Expected no audit logging for %s operation but audit was captured", tc.operation)
|
||||
}
|
||||
err := eds.checkRetrieveAccessPolicy(tc.role, tc.address)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurityConfigIntegration tests integration with SecurityConfig
|
||||
func TestSecurityConfigIntegration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
testConfigs := []struct {
|
||||
name string
|
||||
auditLogging bool
|
||||
auditPath string
|
||||
expectAuditWork bool
|
||||
}{
|
||||
func TestCheckAnnounceAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "audit_enabled_with_path",
|
||||
auditLogging: true,
|
||||
auditPath: "/tmp/test-audit-enabled.log",
|
||||
expectAuditWork: true,
|
||||
name: "architect can announce",
|
||||
role: "senior_software_architect",
|
||||
address: "agent1:senior_software_architect:architecture:proposal",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "audit_disabled",
|
||||
auditLogging: false,
|
||||
auditPath: "/tmp/test-audit-disabled.log",
|
||||
expectAuditWork: false,
|
||||
name: "suggestion role cannot announce",
|
||||
role: "suggestion_only_role",
|
||||
address: "agent1:suggestion_only_role:idea",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "lacks authority",
|
||||
},
|
||||
{
|
||||
name: "audit_enabled_no_path",
|
||||
auditLogging: true,
|
||||
auditPath: "",
|
||||
expectAuditWork: false,
|
||||
name: "unknown role rejected",
|
||||
role: "mystery",
|
||||
address: "agent1:mystery:topic",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown current role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testConfigs {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: tc.auditLogging,
|
||||
AuditPath: tc.auditPath,
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Test audit function behavior with different configurations
|
||||
auditWorked := func() bool {
|
||||
if !cfg.Security.AuditLogging || cfg.Security.AuditPath == "" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}()
|
||||
|
||||
if auditWorked != tc.expectAuditWork {
|
||||
t.Errorf("Expected audit to work: %v, but got: %v", tc.expectAuditWork, auditWorked)
|
||||
}
|
||||
err := eds.checkAnnounceAccessPolicy(tc.role, tc.address)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRoleAuthorityHierarchy tests role authority hierarchy enforcement
|
||||
func TestRoleAuthorityHierarchy(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Test role authority levels for different operations
|
||||
authorityTests := []struct {
|
||||
role string
|
||||
authorityLevel config.AuthorityLevel
|
||||
canStore bool
|
||||
canRetrieve bool
|
||||
canAnnounce bool
|
||||
}{
|
||||
{
|
||||
role: "admin",
|
||||
authorityLevel: config.AuthorityMaster,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "senior_software_architect",
|
||||
authorityLevel: config.AuthorityDecision,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "security_expert",
|
||||
authorityLevel: config.AuthorityCoordination,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "backend_developer",
|
||||
authorityLevel: config.AuthoritySuggestion,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: false,
|
||||
},
|
||||
func verifySecurityExpectation(t *testing.T, expectSuccess bool, hint string, err error) {
|
||||
t.Helper()
|
||||
|
||||
if expectSuccess {
|
||||
if err != nil {
|
||||
t.Fatalf("expected success, got error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for _, tt := range authorityTests {
|
||||
t.Run(tt.role+"_authority_test", func(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tt.role,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-authority.log",
|
||||
},
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatal("expected error but got success")
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Test store permission
|
||||
storeErr := eds.checkStoreAccessPolicy(tt.role, "test:address", "content")
|
||||
if tt.canStore && storeErr != nil {
|
||||
t.Errorf("Role %s should be able to store but got error: %v", tt.role, storeErr)
|
||||
}
|
||||
if !tt.canStore && storeErr == nil {
|
||||
t.Errorf("Role %s should not be able to store but operation succeeded", tt.role)
|
||||
}
|
||||
|
||||
// Test retrieve permission
|
||||
retrieveErr := eds.checkRetrieveAccessPolicy(tt.role, "test:address")
|
||||
if tt.canRetrieve && retrieveErr != nil {
|
||||
t.Errorf("Role %s should be able to retrieve but got error: %v", tt.role, retrieveErr)
|
||||
}
|
||||
if !tt.canRetrieve && retrieveErr == nil {
|
||||
t.Errorf("Role %s should not be able to retrieve but operation succeeded", tt.role)
|
||||
}
|
||||
|
||||
// Test announce permission
|
||||
announceErr := eds.checkAnnounceAccessPolicy(tt.role, "test:address")
|
||||
if tt.canAnnounce && announceErr != nil {
|
||||
t.Errorf("Role %s should be able to announce but got error: %v", tt.role, announceErr)
|
||||
}
|
||||
if !tt.canAnnounce && announceErr == nil {
|
||||
t.Errorf("Role %s should not be able to announce but operation succeeded", tt.role)
|
||||
}
|
||||
})
|
||||
if hint != "" && !strings.Contains(err.Error(), hint) {
|
||||
t.Fatalf("expected error to contain %q, got %q", hint, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurityMetrics tests security-related metrics
|
||||
func TestSecurityMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-metrics.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Simulate some operations to generate metrics
|
||||
for i := 0; i < 5; i++ {
|
||||
eds.metrics.StoredItems++
|
||||
eds.metrics.RetrievedItems++
|
||||
eds.metrics.EncryptionOps++
|
||||
eds.metrics.DecryptionOps++
|
||||
}
|
||||
|
||||
metrics := eds.GetMetrics()
|
||||
|
||||
expectedMetrics := map[string]int64{
|
||||
"stored_items": 5,
|
||||
"retrieved_items": 5,
|
||||
"encryption_ops": 5,
|
||||
"decryption_ops": 5,
|
||||
}
|
||||
|
||||
for metricName, expectedValue := range expectedMetrics {
|
||||
if actualValue, ok := metrics[metricName]; !ok {
|
||||
t.Errorf("Expected metric %s to be present in metrics", metricName)
|
||||
} else if actualValue != expectedValue {
|
||||
t.Errorf("Expected %s to be %d, got %v", metricName, expectedValue, actualValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func createMockEncryptedStorage(ctx context.Context, cfg *config.Config) *EncryptedDHTStorage {
|
||||
return &EncryptedDHTStorage{
|
||||
ctx: ctx,
|
||||
config: cfg,
|
||||
nodeID: "test-node-id",
|
||||
cache: make(map[string]*CachedEntry),
|
||||
metrics: &StorageMetrics{
|
||||
LastUpdate: time.Now(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func containsSubstring(str, substr string) bool {
|
||||
if len(substr) == 0 {
|
||||
return true
|
||||
}
|
||||
if len(str) < len(substr) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i <= len(str)-len(substr); i++ {
|
||||
if str[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Benchmarks for security performance
|
||||
|
||||
func BenchmarkSecurityPolicyChecks(b *testing.B) {
|
||||
ctx := context.Background()
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "bench-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/bench-security.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.Run("store_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkStoreAccessPolicy("backend_developer", "test:address", "content")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("retrieve_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkRetrieveAccessPolicy("backend_developer", "test:address")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("announce_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkAnnounceAccessPolicy("senior_software_architect", "test:address")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkAuditOperations(b *testing.B) {
|
||||
ctx := context.Background()
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "bench-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/bench-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.Run("store_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditStoreOperation("test:address", "backend_developer", "content", 1024, true, "")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("retrieve_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditRetrieveOperation("test:address", "backend_developer", true, "")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("announce_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditAnnounceOperation("test:address", "backend_developer", true, "")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,14 +1,117 @@
|
||||
package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"chorus/pkg/config"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/p2p/security/noise"
|
||||
"github.com/libp2p/go-libp2p/p2p/transport/tcp"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
)
|
||||
|
||||
// NewRealDHT creates a new real DHT implementation
|
||||
func NewRealDHT(config *config.HybridConfig) (DHT, error) {
|
||||
// TODO: Implement real DHT initialization
|
||||
// For now, return an error to indicate it's not yet implemented
|
||||
return nil, fmt.Errorf("real DHT implementation not yet available")
|
||||
}
|
||||
// RealDHT wraps a libp2p-based DHT to satisfy the generic DHT interface.
|
||||
type RealDHT struct {
|
||||
cancel context.CancelFunc
|
||||
host host.Host
|
||||
dht *LibP2PDHT
|
||||
}
|
||||
|
||||
// NewRealDHT creates a new real DHT implementation backed by libp2p.
|
||||
func NewRealDHT(cfg *config.HybridConfig) (DHT, error) {
|
||||
if cfg == nil {
|
||||
cfg = &config.HybridConfig{}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0")
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to create listen address: %w", err)
|
||||
}
|
||||
|
||||
host, err := libp2p.New(
|
||||
libp2p.ListenAddrs(listenAddr),
|
||||
libp2p.Security(noise.ID, noise.New),
|
||||
libp2p.Transport(tcp.NewTCPTransport),
|
||||
libp2p.DefaultMuxers,
|
||||
libp2p.EnableRelay(),
|
||||
)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to create libp2p host: %w", err)
|
||||
}
|
||||
|
||||
opts := []Option{
|
||||
WithProtocolPrefix("/CHORUS"),
|
||||
}
|
||||
|
||||
if nodes := cfg.GetDHTBootstrapNodes(); len(nodes) > 0 {
|
||||
opts = append(opts, WithBootstrapPeersFromStrings(nodes))
|
||||
}
|
||||
|
||||
libp2pDHT, err := NewLibP2PDHT(ctx, host, opts...)
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to initialize libp2p DHT: %w", err)
|
||||
}
|
||||
|
||||
if err := libp2pDHT.Bootstrap(); err != nil {
|
||||
libp2pDHT.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to bootstrap DHT: %w", err)
|
||||
}
|
||||
|
||||
return &RealDHT{
|
||||
cancel: cancel,
|
||||
host: host,
|
||||
dht: libp2pDHT,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// PutValue stores a value in the DHT.
|
||||
func (r *RealDHT) PutValue(ctx context.Context, key string, value []byte) error {
|
||||
return r.dht.PutValue(ctx, key, value)
|
||||
}
|
||||
|
||||
// GetValue retrieves a value from the DHT.
|
||||
func (r *RealDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
|
||||
return r.dht.GetValue(ctx, key)
|
||||
}
|
||||
|
||||
// Provide announces that this node can provide the given key.
|
||||
func (r *RealDHT) Provide(ctx context.Context, key string) error {
|
||||
return r.dht.Provide(ctx, key)
|
||||
}
|
||||
|
||||
// FindProviders locates peers that can provide the specified key.
|
||||
func (r *RealDHT) FindProviders(ctx context.Context, key string, limit int) ([]peer.AddrInfo, error) {
|
||||
return r.dht.FindProviders(ctx, key, limit)
|
||||
}
|
||||
|
||||
// GetStats exposes runtime metrics for the real DHT.
|
||||
func (r *RealDHT) GetStats() DHTStats {
|
||||
return r.dht.GetStats()
|
||||
}
|
||||
|
||||
// Close releases resources associated with the DHT.
|
||||
func (r *RealDHT) Close() error {
|
||||
r.cancel()
|
||||
|
||||
var errs []error
|
||||
if err := r.dht.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := r.host.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
@@ -2,159 +2,106 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestReplicationManager tests basic replication manager functionality
|
||||
func TestReplicationManager(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a mock DHT for testing
|
||||
mockDHT := NewMockDHTInterface()
|
||||
|
||||
// Create replication manager
|
||||
config := DefaultReplicationConfig()
|
||||
config.ReprovideInterval = 1 * time.Second // Short interval for testing
|
||||
config.CleanupInterval = 1 * time.Second
|
||||
|
||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), config)
|
||||
defer rm.Stop()
|
||||
|
||||
// Test adding content
|
||||
testKey := "test-content-key"
|
||||
testSize := int64(1024)
|
||||
testPriority := 5
|
||||
|
||||
err := rm.AddContent(testKey, testSize, testPriority)
|
||||
func newReplicationManagerForTest(t *testing.T) *ReplicationManager {
|
||||
t.Helper()
|
||||
|
||||
cfg := &ReplicationConfig{
|
||||
ReplicationFactor: 3,
|
||||
ReprovideInterval: time.Hour,
|
||||
CleanupInterval: time.Hour,
|
||||
ProviderTTL: 30 * time.Minute,
|
||||
MaxProvidersPerKey: 5,
|
||||
EnableAutoReplication: false,
|
||||
EnableReprovide: false,
|
||||
MaxConcurrentReplications: 1,
|
||||
}
|
||||
|
||||
rm := NewReplicationManager(context.Background(), nil, cfg)
|
||||
t.Cleanup(func() {
|
||||
if rm.reprovideTimer != nil {
|
||||
rm.reprovideTimer.Stop()
|
||||
}
|
||||
if rm.cleanupTimer != nil {
|
||||
rm.cleanupTimer.Stop()
|
||||
}
|
||||
rm.cancel()
|
||||
})
|
||||
return rm
|
||||
}
|
||||
|
||||
func TestAddContentRegistersKey(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("expected AddContent to succeed, got error: %v", err)
|
||||
}
|
||||
|
||||
rm.keysMutex.RLock()
|
||||
record, ok := rm.contentKeys["ucxl://example/path"]
|
||||
rm.keysMutex.RUnlock()
|
||||
|
||||
if !ok {
|
||||
t.Fatal("expected content key to be registered")
|
||||
}
|
||||
|
||||
if record.Size != 512 {
|
||||
t.Fatalf("expected size 512, got %d", record.Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveContentClearsTracking(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("AddContent returned error: %v", err)
|
||||
}
|
||||
|
||||
if err := rm.RemoveContent("ucxl://example/path"); err != nil {
|
||||
t.Fatalf("RemoveContent returned error: %v", err)
|
||||
}
|
||||
|
||||
rm.keysMutex.RLock()
|
||||
_, exists := rm.contentKeys["ucxl://example/path"]
|
||||
rm.keysMutex.RUnlock()
|
||||
|
||||
if exists {
|
||||
t.Fatal("expected content key to be removed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetReplicationStatusReturnsCopy(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("AddContent returned error: %v", err)
|
||||
}
|
||||
|
||||
status, err := rm.GetReplicationStatus("ucxl://example/path")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add content: %v", err)
|
||||
t.Fatalf("GetReplicationStatus returned error: %v", err)
|
||||
}
|
||||
|
||||
// Test getting replication status
|
||||
status, err := rm.GetReplicationStatus(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get replication status: %v", err)
|
||||
|
||||
if status.Key != "ucxl://example/path" {
|
||||
t.Fatalf("expected status key to match, got %s", status.Key)
|
||||
}
|
||||
|
||||
if status.Key != testKey {
|
||||
t.Errorf("Expected key %s, got %s", testKey, status.Key)
|
||||
|
||||
// Mutating status should not affect internal state
|
||||
status.HealthyProviders = 99
|
||||
internal, _ := rm.GetReplicationStatus("ucxl://example/path")
|
||||
if internal.HealthyProviders == 99 {
|
||||
t.Fatal("expected GetReplicationStatus to return a copy")
|
||||
}
|
||||
|
||||
if status.Size != testSize {
|
||||
t.Errorf("Expected size %d, got %d", testSize, status.Size)
|
||||
}
|
||||
|
||||
if status.Priority != testPriority {
|
||||
t.Errorf("Expected priority %d, got %d", testPriority, status.Priority)
|
||||
}
|
||||
|
||||
// Test providing content
|
||||
err = rm.ProvideContent(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to provide content: %v", err)
|
||||
}
|
||||
|
||||
// Test metrics
|
||||
}
|
||||
|
||||
func TestGetMetricsReturnsSnapshot(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
metrics := rm.GetMetrics()
|
||||
if metrics.TotalKeys != 1 {
|
||||
t.Errorf("Expected 1 total key, got %d", metrics.TotalKeys)
|
||||
}
|
||||
|
||||
// Test finding providers
|
||||
providers, err := rm.FindProviders(ctx, testKey, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to find providers: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Found %d providers for key %s", len(providers), testKey)
|
||||
|
||||
// Test removing content
|
||||
err = rm.RemoveContent(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to remove content: %v", err)
|
||||
}
|
||||
|
||||
// Verify content was removed
|
||||
metrics = rm.GetMetrics()
|
||||
if metrics.TotalKeys != 0 {
|
||||
t.Errorf("Expected 0 total keys after removal, got %d", metrics.TotalKeys)
|
||||
if metrics == rm.metrics {
|
||||
t.Fatal("expected GetMetrics to return a copy of metrics")
|
||||
}
|
||||
}
|
||||
|
||||
// TestLibP2PDHTReplication tests DHT replication functionality
|
||||
func TestLibP2PDHTReplication(t *testing.T) {
|
||||
// This would normally require a real libp2p setup
|
||||
// For now, just test the interface methods exist
|
||||
|
||||
// Mock test - in a real implementation, you'd set up actual libp2p hosts
|
||||
t.Log("DHT replication interface methods are implemented")
|
||||
|
||||
// Example of how the replication would be used:
|
||||
// 1. Add content for replication
|
||||
// 2. Content gets automatically provided to the DHT
|
||||
// 3. Other nodes can discover this node as a provider
|
||||
// 4. Periodic reproviding ensures content availability
|
||||
// 5. Replication metrics track system health
|
||||
}
|
||||
|
||||
// TestReplicationConfig tests replication configuration
|
||||
func TestReplicationConfig(t *testing.T) {
|
||||
config := DefaultReplicationConfig()
|
||||
|
||||
// Test default values
|
||||
if config.ReplicationFactor != 3 {
|
||||
t.Errorf("Expected default replication factor 3, got %d", config.ReplicationFactor)
|
||||
}
|
||||
|
||||
if config.ReprovideInterval != 12*time.Hour {
|
||||
t.Errorf("Expected default reprovide interval 12h, got %v", config.ReprovideInterval)
|
||||
}
|
||||
|
||||
if !config.EnableAutoReplication {
|
||||
t.Error("Expected auto replication to be enabled by default")
|
||||
}
|
||||
|
||||
if !config.EnableReprovide {
|
||||
t.Error("Expected reprovide to be enabled by default")
|
||||
}
|
||||
}
|
||||
|
||||
// TestProviderInfo tests provider information tracking
|
||||
func TestProviderInfo(t *testing.T) {
|
||||
// Test distance calculation
|
||||
key := []byte("test-key")
|
||||
peerID := "test-peer-id"
|
||||
|
||||
distance := calculateDistance(key, []byte(peerID))
|
||||
|
||||
// Distance should be non-zero for different inputs
|
||||
if distance == 0 {
|
||||
t.Error("Expected non-zero distance for different inputs")
|
||||
}
|
||||
|
||||
t.Logf("Distance between key and peer: %d", distance)
|
||||
}
|
||||
|
||||
// TestReplicationMetrics tests metrics collection
|
||||
func TestReplicationMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mockDHT := NewMockDHTInterface()
|
||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), DefaultReplicationConfig())
|
||||
defer rm.Stop()
|
||||
|
||||
// Add some content
|
||||
for i := 0; i < 3; i++ {
|
||||
key := fmt.Sprintf("test-key-%d", i)
|
||||
rm.AddContent(key, int64(1000+i*100), i+1)
|
||||
}
|
||||
|
||||
metrics := rm.GetMetrics()
|
||||
|
||||
if metrics.TotalKeys != 3 {
|
||||
t.Errorf("Expected 3 total keys, got %d", metrics.TotalKeys)
|
||||
}
|
||||
|
||||
t.Logf("Replication metrics: %+v", metrics)
|
||||
}
|
||||
@@ -19,8 +19,8 @@ import (
|
||||
type ElectionTrigger string
|
||||
|
||||
const (
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
||||
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
||||
TriggerManual ElectionTrigger = "manual_trigger"
|
||||
@@ -30,30 +30,35 @@ const (
|
||||
type ElectionState string
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
electionTopic = "CHORUS/election/v1"
|
||||
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
StateReconstructing ElectionState = "reconstructing_keys"
|
||||
StateComplete ElectionState = "complete"
|
||||
StateComplete ElectionState = "complete"
|
||||
)
|
||||
|
||||
// AdminCandidate represents a node candidate for admin role
|
||||
type AdminCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ResourceMetrics holds node resource information for election scoring
|
||||
type ResourceMetrics struct {
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
NetworkQuality float64 `json:"network_quality"`
|
||||
}
|
||||
|
||||
@@ -68,46 +73,46 @@ type ElectionMessage struct {
|
||||
|
||||
// ElectionManager handles admin election coordination
|
||||
type ElectionManager struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
// Election state
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
// Timers and channels
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
// Heartbeat management
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
// Callbacks
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onElectionComplete func(winner string)
|
||||
|
||||
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// HeartbeatManager manages admin heartbeat lifecycle
|
||||
type HeartbeatManager struct {
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
}
|
||||
|
||||
// NewElectionManager creates a new election manager
|
||||
@@ -119,7 +124,7 @@ func NewElectionManager(
|
||||
nodeID string,
|
||||
) *ElectionManager {
|
||||
electionCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
|
||||
em := &ElectionManager{
|
||||
ctx: electionCtx,
|
||||
cancel: cancel,
|
||||
@@ -133,7 +138,7 @@ func NewElectionManager(
|
||||
electionTrigger: make(chan ElectionTrigger, 10),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
// Initialize heartbeat manager
|
||||
em.heartbeatManager = &HeartbeatManager{
|
||||
electionMgr: em,
|
||||
@@ -141,29 +146,32 @@ func NewElectionManager(
|
||||
log.Printf("[HEARTBEAT] "+msg, args...)
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
// Start begins the election management system
|
||||
func (em *ElectionManager) Start() error {
|
||||
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
||||
|
||||
// TODO: Subscribe to election-related messages - pubsub interface needs update
|
||||
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
// }
|
||||
//
|
||||
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
// }
|
||||
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleElectionMessage(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
}
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleAdminHeartbeat(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// Start discovery process
|
||||
go em.startDiscoveryLoop()
|
||||
|
||||
|
||||
// Start election coordinator
|
||||
go em.electionCoordinator()
|
||||
|
||||
|
||||
// Start heartbeat if this node is already admin at startup
|
||||
if em.IsCurrentAdmin() {
|
||||
go func() {
|
||||
@@ -174,7 +182,7 @@ func (em *ElectionManager) Start() error {
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
log.Printf("✅ Election manager started")
|
||||
return nil
|
||||
}
|
||||
@@ -182,17 +190,17 @@ func (em *ElectionManager) Start() error {
|
||||
// Stop shuts down the election manager
|
||||
func (em *ElectionManager) Stop() {
|
||||
log.Printf("🛑 Stopping election manager")
|
||||
|
||||
|
||||
// Stop heartbeat first
|
||||
if em.heartbeatManager != nil {
|
||||
em.heartbeatManager.StopHeartbeat()
|
||||
}
|
||||
|
||||
|
||||
em.cancel()
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.heartbeatTimer != nil {
|
||||
em.heartbeatTimer.Stop()
|
||||
}
|
||||
@@ -255,7 +263,7 @@ func (em *ElectionManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
// startDiscoveryLoop starts the admin discovery loop
|
||||
func (em *ElectionManager) startDiscoveryLoop() {
|
||||
log.Printf("🔍 Starting admin discovery loop")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -272,19 +280,19 @@ func (em *ElectionManager) performAdminDiscovery() {
|
||||
currentState := em.state
|
||||
lastHeartbeat := em.lastHeartbeat
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Only discover if we're idle or the heartbeat is stale
|
||||
if currentState != StateIdle {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Check if admin heartbeat has timed out
|
||||
if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout {
|
||||
log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat)
|
||||
em.TriggerElection(TriggerHeartbeatTimeout)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// If we haven't heard from an admin recently, try to discover one
|
||||
if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 {
|
||||
em.sendDiscoveryRequest()
|
||||
@@ -298,7 +306,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(discoveryMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery request: %v", err)
|
||||
}
|
||||
@@ -307,7 +315,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
// electionCoordinator handles the main election logic
|
||||
func (em *ElectionManager) electionCoordinator() {
|
||||
log.Printf("🎯 Election coordinator started")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -321,17 +329,17 @@ func (em *ElectionManager) electionCoordinator() {
|
||||
// handleElectionTrigger processes election triggers
|
||||
func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
log.Printf("🔥 Processing election trigger: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
currentState := em.state
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Ignore triggers if we're already in an election
|
||||
if currentState != StateIdle {
|
||||
log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Begin election process
|
||||
em.beginElection(trigger)
|
||||
}
|
||||
@@ -339,7 +347,7 @@ func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
// beginElection starts a new election
|
||||
func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
log.Printf("🗳️ Beginning election due to: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateElecting
|
||||
em.currentTerm++
|
||||
@@ -347,12 +355,12 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Announce candidacy if this node can be admin
|
||||
if em.canBeAdmin() {
|
||||
em.announceCandidacy(term)
|
||||
}
|
||||
|
||||
|
||||
// Send election announcement
|
||||
electionMsg := ElectionMessage{
|
||||
Type: "election_started",
|
||||
@@ -363,11 +371,11 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
"trigger": string(trigger),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(electionMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election start: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Start election timeout
|
||||
em.startElectionTimeout(term)
|
||||
}
|
||||
@@ -386,7 +394,7 @@ func (em *ElectionManager) canBeAdmin() bool {
|
||||
// announceCandidacy announces this node as an election candidate
|
||||
func (em *ElectionManager) announceCandidacy(term int) {
|
||||
uptime := time.Since(em.startTime)
|
||||
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: em.nodeID,
|
||||
PeerID: em.host.ID(),
|
||||
@@ -396,13 +404,13 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Experience: uptime, // For now, use uptime as experience
|
||||
Metadata: map[string]interface{}{
|
||||
"specialization": em.config.Agent.Specialization,
|
||||
"models": em.config.Agent.Models,
|
||||
"models": em.config.Agent.Models,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
// Calculate candidate score
|
||||
candidate.Score = em.calculateCandidateScore(candidate)
|
||||
|
||||
|
||||
candidacyMsg := ElectionMessage{
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: em.nodeID,
|
||||
@@ -410,9 +418,9 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Term: term,
|
||||
Data: candidate,
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📢 Announcing candidacy (score: %.2f)", candidate.Score)
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(candidacyMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce candidacy: %v", err)
|
||||
}
|
||||
@@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics {
|
||||
// TODO: Implement actual resource collection
|
||||
// For now, return simulated values
|
||||
return ResourceMetrics{
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
||||
}
|
||||
}
|
||||
@@ -435,10 +443,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
// TODO: Add LeadershipScoring to config.ElectionConfig
|
||||
// scoring := em.config.Security.ElectionConfig.LeadershipScoring
|
||||
// Default scoring weights handled inline
|
||||
|
||||
|
||||
// Normalize metrics to 0-1 range
|
||||
uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score
|
||||
|
||||
|
||||
// Capability score - higher for admin/coordination capabilities
|
||||
capabilityScore := 0.0
|
||||
adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"}
|
||||
@@ -455,22 +463,22 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
}
|
||||
}
|
||||
capabilityScore = min(1.0, capabilityScore)
|
||||
|
||||
|
||||
// Resource score - lower usage is better
|
||||
resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.MemoryUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.DiskUsage) * 0.2 +
|
||||
candidate.Resources.NetworkQuality * 0.2
|
||||
|
||||
resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 +
|
||||
(1.0-candidate.Resources.MemoryUsage)*0.3 +
|
||||
(1.0-candidate.Resources.DiskUsage)*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.2
|
||||
|
||||
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
||||
|
||||
|
||||
// Weighted final score (using default weights)
|
||||
finalScore := uptimeScore*0.3 +
|
||||
capabilityScore*0.2 +
|
||||
resourceScore*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.15 +
|
||||
experienceScore*0.15
|
||||
|
||||
|
||||
return finalScore
|
||||
}
|
||||
|
||||
@@ -478,11 +486,11 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.electionTimer != nil {
|
||||
em.electionTimer.Stop()
|
||||
}
|
||||
|
||||
|
||||
em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() {
|
||||
em.completeElection(term)
|
||||
})
|
||||
@@ -492,15 +500,15 @@ func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
func (em *ElectionManager) completeElection(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Verify this is still the current term
|
||||
if term != em.currentTerm {
|
||||
log.Printf("⏰ Election timeout for old term %d, ignoring", term)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("⏰ Election timeout reached, tallying votes")
|
||||
|
||||
|
||||
// Find the winning candidate
|
||||
winner := em.findElectionWinner()
|
||||
if winner == nil {
|
||||
@@ -513,14 +521,14 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
}()
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("🏆 Election winner: %s (score: %.2f)", winner.NodeID, winner.Score)
|
||||
|
||||
|
||||
// Update admin
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateComplete
|
||||
|
||||
|
||||
// Announce the winner
|
||||
winnerMsg := ElectionMessage{
|
||||
Type: "election_winner",
|
||||
@@ -529,16 +537,16 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
Term: term,
|
||||
Data: winner,
|
||||
}
|
||||
|
||||
|
||||
em.mu.Unlock() // Unlock before publishing
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(winnerMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election winner: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callbacks
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -546,7 +554,7 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
if em.onElectionComplete != nil {
|
||||
em.onElectionComplete(winner.NodeID)
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateIdle // Reset state for next election
|
||||
}
|
||||
@@ -556,16 +564,16 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
if len(em.candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// Count votes for each candidate
|
||||
voteCounts := make(map[string]int)
|
||||
totalVotes := 0
|
||||
|
||||
|
||||
// Initialize vote counts for all candidates
|
||||
for candidateID := range em.candidates {
|
||||
voteCounts[candidateID] = 0
|
||||
}
|
||||
|
||||
|
||||
// Tally actual votes
|
||||
for _, candidateID := range em.votes {
|
||||
if _, exists := em.candidates[candidateID]; exists {
|
||||
@@ -573,12 +581,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
totalVotes++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If no votes cast, fall back to highest scoring candidate
|
||||
if totalVotes == 0 {
|
||||
var winner *AdminCandidate
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for _, candidate := range em.candidates {
|
||||
if candidate.Score > highestScore {
|
||||
highestScore = candidate.Score
|
||||
@@ -587,12 +595,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
}
|
||||
return winner
|
||||
}
|
||||
|
||||
|
||||
// Find candidate with most votes
|
||||
var winner *AdminCandidate
|
||||
maxVotes := -1
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for candidateID, voteCount := range voteCounts {
|
||||
candidate := em.candidates[candidateID]
|
||||
if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) {
|
||||
@@ -601,10 +609,10 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
winner = candidate
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
totalVotes, winner.NodeID, maxVotes, winner.Score)
|
||||
|
||||
|
||||
return winner
|
||||
}
|
||||
|
||||
@@ -615,12 +623,12 @@ func (em *ElectionManager) handleElectionMessage(data []byte) {
|
||||
log.Printf("❌ Failed to unmarshal election message: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Ignore messages from ourselves
|
||||
if msg.NodeID == em.nodeID {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
switch msg.Type {
|
||||
case "admin_discovery_request":
|
||||
em.handleAdminDiscoveryRequest(msg)
|
||||
@@ -643,7 +651,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
currentAdmin := em.currentAdmin
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
|
||||
// Only respond if we know who the current admin is and we're idle
|
||||
if currentAdmin != "" && state == StateIdle {
|
||||
responseMsg := ElectionMessage{
|
||||
@@ -654,7 +662,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
"current_admin": currentAdmin,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(responseMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery response: %v", err)
|
||||
}
|
||||
@@ -679,7 +687,7 @@ func (em *ElectionManager) handleAdminDiscoveryResponse(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// If we receive an election start with a higher term, join the election
|
||||
if msg.Term > em.currentTerm {
|
||||
log.Printf("🔄 Joining election with term %d", msg.Term)
|
||||
@@ -687,7 +695,7 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.state = StateElecting
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
|
||||
|
||||
// Announce candidacy if eligible
|
||||
if em.canBeAdmin() {
|
||||
go em.announceCandidacy(msg.Term)
|
||||
@@ -699,25 +707,25 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Only process if it's for the current term
|
||||
if msg.Term != em.currentTerm {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Convert data to candidate struct
|
||||
candidateData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
log.Printf("❌ Failed to marshal candidate data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var candidate AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &candidate); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal candidate: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📝 Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score)
|
||||
em.candidates[candidate.NodeID] = &candidate
|
||||
}
|
||||
@@ -726,31 +734,31 @@ func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionVote(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Extract vote data
|
||||
voteData, ok := msg.Data.(map[string]interface{})
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid vote data format from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
candidateID, ok := voteData["candidate"].(string)
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Validate candidate exists
|
||||
if _, exists := em.candidates[candidateID]; !exists {
|
||||
log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Prevent duplicate voting
|
||||
if existingVote, exists := em.votes[msg.NodeID]; exists {
|
||||
log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID)
|
||||
}
|
||||
|
||||
|
||||
// Record the vote
|
||||
em.votes[msg.NodeID] = candidateID
|
||||
log.Printf("🗳️ Recorded vote from %s for candidate %s", msg.NodeID, candidateID)
|
||||
@@ -763,24 +771,24 @@ func (em *ElectionManager) handleElectionWinner(msg ElectionMessage) {
|
||||
log.Printf("❌ Failed to marshal winner data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var winner AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &winner); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal winner: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateIdle
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
log.Printf("👑 New admin elected: %s", winner.NodeID)
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callback
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -796,7 +804,7 @@ func (em *ElectionManager) handleHeartbeatTransition(oldAdmin, newAdmin string)
|
||||
log.Printf("⚠️ Error stopping heartbeat: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If we gained admin role, start heartbeat
|
||||
if newAdmin == em.nodeID && oldAdmin != em.nodeID {
|
||||
log.Printf("🔄 Gained admin role, starting heartbeat")
|
||||
@@ -816,15 +824,15 @@ func (em *ElectionManager) handleAdminHeartbeat(data []byte) {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
|
||||
if err := json.Unmarshal(data, &heartbeat); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal heartbeat: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Update admin and heartbeat timestamp
|
||||
if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID {
|
||||
em.currentAdmin = heartbeat.NodeID
|
||||
@@ -838,11 +846,8 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal election message: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/election/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(electionTopic, data)
|
||||
}
|
||||
|
||||
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
||||
@@ -850,7 +855,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
if !em.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not current admin")
|
||||
}
|
||||
|
||||
|
||||
heartbeat := struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
@@ -858,16 +863,13 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
data, err := json.Marshal(heartbeat)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
|
||||
}
|
||||
|
||||
// min returns the minimum of two float64 values
|
||||
@@ -894,26 +896,26 @@ func NewHeartbeatManager(electionMgr *ElectionManager) *HeartbeatManager {
|
||||
func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if hm.isRunning {
|
||||
hm.logger("Heartbeat already running")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
if !hm.electionMgr.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not admin, cannot start heartbeat")
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Starting admin heartbeat transmission")
|
||||
|
||||
|
||||
hm.stopCh = make(chan struct{})
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
hm.ticker = time.NewTicker(interval)
|
||||
hm.isRunning = true
|
||||
|
||||
|
||||
// Start heartbeat goroutine
|
||||
go hm.heartbeatLoop()
|
||||
|
||||
|
||||
hm.logger("Admin heartbeat started (interval: %v)", interval)
|
||||
return nil
|
||||
}
|
||||
@@ -922,22 +924,22 @@ func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
func (hm *HeartbeatManager) StopHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if !hm.isRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Stopping admin heartbeat transmission")
|
||||
|
||||
|
||||
// Signal stop
|
||||
close(hm.stopCh)
|
||||
|
||||
|
||||
// Stop ticker
|
||||
if hm.ticker != nil {
|
||||
hm.ticker.Stop()
|
||||
hm.ticker = nil
|
||||
}
|
||||
|
||||
|
||||
hm.isRunning = false
|
||||
hm.logger("Admin heartbeat stopped")
|
||||
return nil
|
||||
@@ -958,7 +960,7 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.mu.Unlock()
|
||||
hm.logger("Heartbeat loop terminated")
|
||||
}()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-hm.ticker.C:
|
||||
@@ -971,11 +973,11 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.logger("No longer admin, stopping heartbeat")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
case <-hm.stopCh:
|
||||
hm.logger("Heartbeat stop signal received")
|
||||
return
|
||||
|
||||
|
||||
case <-hm.electionMgr.ctx.Done():
|
||||
hm.logger("Election manager context cancelled")
|
||||
return
|
||||
@@ -987,19 +989,19 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
status := map[string]interface{}{
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
}
|
||||
|
||||
|
||||
if hm.isRunning && hm.ticker != nil {
|
||||
// Calculate next heartbeat time (approximate)
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
status["interval"] = interval.String()
|
||||
status["next_heartbeat"] = time.Now().Add(interval)
|
||||
}
|
||||
|
||||
|
||||
return status
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,451 +2,185 @@ package election
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
pubsubpkg "chorus/pubsub"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
)
|
||||
|
||||
func TestElectionManager_NewElectionManager(t *testing.T) {
|
||||
// newTestElectionManager wires a real libp2p host and PubSub instance so the
|
||||
// election manager exercises the same code paths used in production.
|
||||
func newTestElectionManager(t *testing.T) *ElectionManager {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("failed to create libp2p host: %v", err)
|
||||
}
|
||||
|
||||
ps, err := pubsubpkg.NewPubSub(ctx, host, "", "")
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
t.Fatalf("failed to create pubsub: %v", err)
|
||||
}
|
||||
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
ID: host.ID().String(),
|
||||
Role: "context_admin",
|
||||
Capabilities: []string{"admin_election", "context_curation"},
|
||||
Models: []string{"meta/llama-3.1-8b-instruct"},
|
||||
Specialization: "coordination",
|
||||
},
|
||||
Security: config.SecurityConfig{},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
if em == nil {
|
||||
t.Fatal("Expected NewElectionManager to return non-nil manager")
|
||||
}
|
||||
em := NewElectionManager(ctx, cfg, host, ps, host.ID().String())
|
||||
|
||||
if em.nodeID != "test-node" {
|
||||
t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
em.Stop()
|
||||
ps.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
})
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
func TestNewElectionManagerInitialState(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if em.state != StateIdle {
|
||||
t.Errorf("Expected initial state to be StateIdle, got %v", em.state)
|
||||
t.Fatalf("expected initial state %q, got %q", StateIdle, em.state)
|
||||
}
|
||||
|
||||
if em.currentTerm != 0 {
|
||||
t.Fatalf("expected initial term 0, got %d", em.currentTerm)
|
||||
}
|
||||
|
||||
if em.nodeID == "" {
|
||||
t.Fatal("expected nodeID to be populated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_StartElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestElectionManagerCanBeAdmin(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if !em.canBeAdmin() {
|
||||
t.Fatal("expected node to qualify for admin election")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Start election
|
||||
err := em.StartElection()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to start election: %v", err)
|
||||
}
|
||||
|
||||
// Verify state changed
|
||||
if em.state != StateCandidate {
|
||||
t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state)
|
||||
}
|
||||
|
||||
// Verify we added ourselves as a candidate
|
||||
em.mu.RLock()
|
||||
candidate, exists := em.candidates[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find ourselves as a candidate after starting election")
|
||||
}
|
||||
|
||||
if candidate.NodeID != em.nodeID {
|
||||
t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID)
|
||||
em.config.Agent.Capabilities = []string{"runtime_support"}
|
||||
if em.canBeAdmin() {
|
||||
t.Fatal("expected node without admin capabilities to be ineligible")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Vote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Vote for the candidate
|
||||
err := em.Vote("candidate-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to vote: %v", err)
|
||||
}
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find our vote after voting")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected vote to be for 'candidate-1', got %s", vote)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_VoteInvalidCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Try to vote for non-existent candidate
|
||||
err := em.Vote("non-existent")
|
||||
if err == nil {
|
||||
t.Error("Expected error when voting for non-existent candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_AddCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "new-candidate",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Verify candidate was added
|
||||
em.mu.RLock()
|
||||
stored, exists := em.candidates["new-candidate"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find added candidate")
|
||||
}
|
||||
|
||||
if stored.NodeID != "new-candidate" {
|
||||
t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID)
|
||||
}
|
||||
|
||||
if stored.Score != 0.7 {
|
||||
t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinner(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates with different scores
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-3",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
em.candidates = map[string]*AdminCandidate{
|
||||
"candidate-1": {
|
||||
NodeID: "candidate-1",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.65,
|
||||
},
|
||||
"candidate-2": {
|
||||
NodeID: "candidate-2",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.80,
|
||||
},
|
||||
}
|
||||
em.votes = map[string]string{
|
||||
"voter-a": "candidate-1",
|
||||
"voter-b": "candidate-2",
|
||||
"voter-c": "candidate-2",
|
||||
}
|
||||
|
||||
// Add some votes
|
||||
em.votes["voter-1"] = "candidate-2"
|
||||
em.votes["voter-2"] = "candidate-2"
|
||||
em.votes["voter-3"] = "candidate-1"
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
t.Fatal("expected a winner to be selected")
|
||||
}
|
||||
|
||||
// candidate-2 should win with most votes (2 votes)
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID)
|
||||
t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates but no votes - should fall back to highest score
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.9, // Highest score
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
func TestHandleElectionMessageAddsCandidate(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
}
|
||||
em.currentTerm = 3
|
||||
em.state = StateElecting
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner without any votes
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
}
|
||||
|
||||
// candidate-2 should win with highest score
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
NodeID: "peer-2",
|
||||
PeerID: em.host.ID(),
|
||||
Capabilities: []string{"admin_election"},
|
||||
Uptime: time.Second,
|
||||
Score: 0.75,
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal candidate: %v", err)
|
||||
}
|
||||
|
||||
var data map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &data); err != nil {
|
||||
t.Fatalf("failed to unmarshal candidate payload: %v", err)
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Create vote message
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: map[string]interface{}{
|
||||
"candidate": "candidate-1",
|
||||
},
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: "peer-2",
|
||||
Timestamp: time.Now(),
|
||||
Term: 3,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
// Handle the vote
|
||||
em.handleElectionVote(msg)
|
||||
serialized, err := json.Marshal(msg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal election message: %v", err)
|
||||
}
|
||||
|
||||
em.handleElectionMessage(serialized)
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes["voter-1"]
|
||||
_, exists := em.candidates["peer-2"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected vote to be recorded after handling vote message")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote)
|
||||
t.Fatal("expected candidacy announcement to register candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if err := em.SendAdminHeartbeat(); err == nil {
|
||||
t.Fatal("expected error when non-admin sends heartbeat")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Create vote message with invalid data
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: "invalid-data", // Should be map[string]interface{}
|
||||
if err := em.Start(); err != nil {
|
||||
t.Fatalf("failed to start election manager: %v", err)
|
||||
}
|
||||
|
||||
// Handle the vote - should not crash
|
||||
em.handleElectionVote(msg)
|
||||
|
||||
// Verify no vote was recorded
|
||||
em.mu.RLock()
|
||||
_, exists := em.votes["voter-1"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if exists {
|
||||
t.Error("Expected no vote to be recorded with invalid data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_CompleteElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Set up election state
|
||||
em.mu.Lock()
|
||||
em.state = StateCandidate
|
||||
em.currentTerm = 1
|
||||
em.currentAdmin = em.nodeID
|
||||
em.mu.Unlock()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "winner",
|
||||
Term: 1,
|
||||
Score: 0.9,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["winner"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Complete election
|
||||
em.CompleteElection()
|
||||
|
||||
// Verify state reset
|
||||
em.mu.RLock()
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
if state != StateIdle {
|
||||
t.Errorf("Expected state to be StateIdle after completing election, got %v", state)
|
||||
if err := em.SendAdminHeartbeat(); err != nil {
|
||||
t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Concurrency(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Test concurrent access to vote and candidate operations
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Run concurrent operations
|
||||
done := make(chan bool, 2)
|
||||
|
||||
// Concurrent voting
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.Vote("candidate-1") // Ignore errors in concurrent test
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Concurrent state checking
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.findElectionWinner() // Just check for races
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for completion
|
||||
for i := 0; i < 2; i++ {
|
||||
select {
|
||||
case <-done:
|
||||
case <-ctx.Done():
|
||||
t.Fatal("Concurrent test timed out")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,27 +2,26 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system
|
||||
type CHORUSMetrics struct {
|
||||
registry *prometheus.Registry
|
||||
httpServer *http.Server
|
||||
|
||||
registry *prometheus.Registry
|
||||
httpServer *http.Server
|
||||
|
||||
// System metrics
|
||||
systemInfo *prometheus.GaugeVec
|
||||
uptime prometheus.Gauge
|
||||
buildInfo *prometheus.GaugeVec
|
||||
|
||||
systemInfo *prometheus.GaugeVec
|
||||
uptime prometheus.Gauge
|
||||
buildInfo *prometheus.GaugeVec
|
||||
|
||||
// P2P metrics
|
||||
p2pConnectedPeers prometheus.Gauge
|
||||
p2pMessagesSent *prometheus.CounterVec
|
||||
@@ -30,95 +29,98 @@ type CHORUSMetrics struct {
|
||||
p2pMessageLatency *prometheus.HistogramVec
|
||||
p2pConnectionDuration *prometheus.HistogramVec
|
||||
p2pPeerScore *prometheus.GaugeVec
|
||||
|
||||
|
||||
// DHT metrics
|
||||
dhtPutOperations *prometheus.CounterVec
|
||||
dhtGetOperations *prometheus.CounterVec
|
||||
dhtOperationLatency *prometheus.HistogramVec
|
||||
dhtProviderRecords prometheus.Gauge
|
||||
dhtReplicationFactor *prometheus.GaugeVec
|
||||
dhtContentKeys prometheus.Gauge
|
||||
dhtCacheHits *prometheus.CounterVec
|
||||
dhtCacheMisses *prometheus.CounterVec
|
||||
|
||||
dhtPutOperations *prometheus.CounterVec
|
||||
dhtGetOperations *prometheus.CounterVec
|
||||
dhtOperationLatency *prometheus.HistogramVec
|
||||
dhtProviderRecords prometheus.Gauge
|
||||
dhtReplicationFactor *prometheus.GaugeVec
|
||||
dhtContentKeys prometheus.Gauge
|
||||
dhtCacheHits *prometheus.CounterVec
|
||||
dhtCacheMisses *prometheus.CounterVec
|
||||
|
||||
// PubSub metrics
|
||||
pubsubTopics prometheus.Gauge
|
||||
pubsubSubscribers *prometheus.GaugeVec
|
||||
pubsubMessages *prometheus.CounterVec
|
||||
pubsubMessageLatency *prometheus.HistogramVec
|
||||
pubsubMessageSize *prometheus.HistogramVec
|
||||
|
||||
pubsubTopics prometheus.Gauge
|
||||
pubsubSubscribers *prometheus.GaugeVec
|
||||
pubsubMessages *prometheus.CounterVec
|
||||
pubsubMessageLatency *prometheus.HistogramVec
|
||||
pubsubMessageSize *prometheus.HistogramVec
|
||||
|
||||
// Election metrics
|
||||
electionTerm prometheus.Gauge
|
||||
electionState *prometheus.GaugeVec
|
||||
heartbeatsSent prometheus.Counter
|
||||
heartbeatsReceived prometheus.Counter
|
||||
leadershipChanges prometheus.Counter
|
||||
leaderUptime prometheus.Gauge
|
||||
electionLatency prometheus.Histogram
|
||||
|
||||
electionTerm prometheus.Gauge
|
||||
electionState *prometheus.GaugeVec
|
||||
heartbeatsSent prometheus.Counter
|
||||
heartbeatsReceived prometheus.Counter
|
||||
leadershipChanges prometheus.Counter
|
||||
leaderUptime prometheus.Gauge
|
||||
electionLatency prometheus.Histogram
|
||||
|
||||
// Health metrics
|
||||
healthChecksPassed *prometheus.CounterVec
|
||||
healthChecksFailed *prometheus.CounterVec
|
||||
healthCheckDuration *prometheus.HistogramVec
|
||||
systemHealthScore prometheus.Gauge
|
||||
componentHealthScore *prometheus.GaugeVec
|
||||
|
||||
healthChecksPassed *prometheus.CounterVec
|
||||
healthChecksFailed *prometheus.CounterVec
|
||||
healthCheckDuration *prometheus.HistogramVec
|
||||
systemHealthScore prometheus.Gauge
|
||||
componentHealthScore *prometheus.GaugeVec
|
||||
|
||||
// Task metrics
|
||||
tasksActive prometheus.Gauge
|
||||
tasksQueued prometheus.Gauge
|
||||
tasksCompleted *prometheus.CounterVec
|
||||
taskDuration *prometheus.HistogramVec
|
||||
taskQueueWaitTime prometheus.Histogram
|
||||
|
||||
tasksActive prometheus.Gauge
|
||||
tasksQueued prometheus.Gauge
|
||||
tasksCompleted *prometheus.CounterVec
|
||||
taskDuration *prometheus.HistogramVec
|
||||
taskQueueWaitTime prometheus.Histogram
|
||||
|
||||
// SLURP metrics (context generation)
|
||||
slurpGenerated *prometheus.CounterVec
|
||||
slurpGenerationTime prometheus.Histogram
|
||||
slurpQueueLength prometheus.Gauge
|
||||
slurpActiveJobs prometheus.Gauge
|
||||
slurpLeadershipEvents prometheus.Counter
|
||||
|
||||
|
||||
// SHHH sentinel metrics
|
||||
shhhFindings *prometheus.CounterVec
|
||||
|
||||
// UCXI metrics (protocol resolution)
|
||||
ucxiRequests *prometheus.CounterVec
|
||||
ucxiResolutionLatency prometheus.Histogram
|
||||
ucxiCacheHits prometheus.Counter
|
||||
ucxiCacheMisses prometheus.Counter
|
||||
ucxiContentSize prometheus.Histogram
|
||||
|
||||
|
||||
// Resource metrics
|
||||
cpuUsage prometheus.Gauge
|
||||
memoryUsage prometheus.Gauge
|
||||
diskUsage *prometheus.GaugeVec
|
||||
networkBytesIn prometheus.Counter
|
||||
networkBytesOut prometheus.Counter
|
||||
goroutines prometheus.Gauge
|
||||
|
||||
cpuUsage prometheus.Gauge
|
||||
memoryUsage prometheus.Gauge
|
||||
diskUsage *prometheus.GaugeVec
|
||||
networkBytesIn prometheus.Counter
|
||||
networkBytesOut prometheus.Counter
|
||||
goroutines prometheus.Gauge
|
||||
|
||||
// Error metrics
|
||||
errors *prometheus.CounterVec
|
||||
panics prometheus.Counter
|
||||
|
||||
startTime time.Time
|
||||
mu sync.RWMutex
|
||||
errors *prometheus.CounterVec
|
||||
panics prometheus.Counter
|
||||
|
||||
startTime time.Time
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// MetricsConfig configures the metrics system
|
||||
type MetricsConfig struct {
|
||||
// HTTP server config
|
||||
ListenAddr string
|
||||
MetricsPath string
|
||||
|
||||
ListenAddr string
|
||||
MetricsPath string
|
||||
|
||||
// Histogram buckets
|
||||
LatencyBuckets []float64
|
||||
SizeBuckets []float64
|
||||
|
||||
|
||||
// Labels
|
||||
NodeID string
|
||||
Version string
|
||||
Environment string
|
||||
Cluster string
|
||||
|
||||
NodeID string
|
||||
Version string
|
||||
Environment string
|
||||
Cluster string
|
||||
|
||||
// Collection intervals
|
||||
SystemMetricsInterval time.Duration
|
||||
SystemMetricsInterval time.Duration
|
||||
ResourceMetricsInterval time.Duration
|
||||
}
|
||||
|
||||
@@ -143,20 +145,20 @@ func NewCHORUSMetrics(config *MetricsConfig) *CHORUSMetrics {
|
||||
if config == nil {
|
||||
config = DefaultMetricsConfig()
|
||||
}
|
||||
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
|
||||
|
||||
metrics := &CHORUSMetrics{
|
||||
registry: registry,
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
// Initialize all metrics
|
||||
metrics.initializeMetrics(config)
|
||||
|
||||
|
||||
// Register with custom registry
|
||||
metrics.registerMetrics()
|
||||
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
@@ -170,14 +172,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"node_id", "version", "go_version", "cluster", "environment"},
|
||||
)
|
||||
|
||||
|
||||
m.uptime = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_uptime_seconds",
|
||||
Help: "System uptime in seconds",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
// P2P metrics
|
||||
m.p2pConnectedPeers = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -185,7 +187,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Help: "Number of connected P2P peers",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.p2pMessagesSent = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_p2p_messages_sent_total",
|
||||
@@ -193,7 +195,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"message_type", "peer_id"},
|
||||
)
|
||||
|
||||
|
||||
m.p2pMessagesReceived = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_p2p_messages_received_total",
|
||||
@@ -201,7 +203,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"message_type", "peer_id"},
|
||||
)
|
||||
|
||||
|
||||
m.p2pMessageLatency = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_p2p_message_latency_seconds",
|
||||
@@ -210,7 +212,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"message_type"},
|
||||
)
|
||||
|
||||
|
||||
// DHT metrics
|
||||
m.dhtPutOperations = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -219,7 +221,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"status"},
|
||||
)
|
||||
|
||||
|
||||
m.dhtGetOperations = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_dht_get_operations_total",
|
||||
@@ -227,7 +229,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"status"},
|
||||
)
|
||||
|
||||
|
||||
m.dhtOperationLatency = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_dht_operation_latency_seconds",
|
||||
@@ -236,21 +238,21 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"operation", "status"},
|
||||
)
|
||||
|
||||
|
||||
m.dhtProviderRecords = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_dht_provider_records",
|
||||
Help: "Number of DHT provider records",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.dhtContentKeys = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_dht_content_keys",
|
||||
Help: "Number of DHT content keys",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.dhtReplicationFactor = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_dht_replication_factor",
|
||||
@@ -258,7 +260,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"key_hash"},
|
||||
)
|
||||
|
||||
|
||||
// PubSub metrics
|
||||
m.pubsubTopics = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -266,7 +268,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Help: "Number of active PubSub topics",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.pubsubMessages = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_pubsub_messages_total",
|
||||
@@ -274,7 +276,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"topic", "direction", "message_type"},
|
||||
)
|
||||
|
||||
|
||||
m.pubsubMessageLatency = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_pubsub_message_latency_seconds",
|
||||
@@ -283,7 +285,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"topic"},
|
||||
)
|
||||
|
||||
|
||||
// Election metrics
|
||||
m.electionTerm = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -291,7 +293,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Help: "Current election term",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.electionState = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_election_state",
|
||||
@@ -299,28 +301,28 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"state"},
|
||||
)
|
||||
|
||||
|
||||
m.heartbeatsSent = promauto.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_heartbeats_sent_total",
|
||||
Help: "Total number of heartbeats sent",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.heartbeatsReceived = promauto.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_heartbeats_received_total",
|
||||
Help: "Total number of heartbeats received",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.leadershipChanges = promauto.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_leadership_changes_total",
|
||||
Help: "Total number of leadership changes",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
// Health metrics
|
||||
m.healthChecksPassed = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -329,7 +331,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"check_name"},
|
||||
)
|
||||
|
||||
|
||||
m.healthChecksFailed = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_health_checks_failed_total",
|
||||
@@ -337,14 +339,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"check_name", "reason"},
|
||||
)
|
||||
|
||||
|
||||
m.systemHealthScore = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_system_health_score",
|
||||
Help: "Overall system health score (0-1)",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.componentHealthScore = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_component_health_score",
|
||||
@@ -352,7 +354,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"component"},
|
||||
)
|
||||
|
||||
|
||||
// Task metrics
|
||||
m.tasksActive = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -360,14 +362,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Help: "Number of active tasks",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.tasksQueued = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_tasks_queued",
|
||||
Help: "Number of queued tasks",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.tasksCompleted = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_tasks_completed_total",
|
||||
@@ -375,7 +377,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"status", "task_type"},
|
||||
)
|
||||
|
||||
|
||||
m.taskDuration = promauto.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_task_duration_seconds",
|
||||
@@ -384,7 +386,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"task_type", "status"},
|
||||
)
|
||||
|
||||
|
||||
// SLURP metrics
|
||||
m.slurpGenerated = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -393,7 +395,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"role", "status"},
|
||||
)
|
||||
|
||||
|
||||
m.slurpGenerationTime = promauto.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_slurp_generation_time_seconds",
|
||||
@@ -401,14 +403,23 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.slurpQueueLength = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_slurp_queue_length",
|
||||
Help: "Length of SLURP generation queue",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
// SHHH metrics
|
||||
m.shhhFindings = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_shhh_findings_total",
|
||||
Help: "Total number of SHHH redaction findings",
|
||||
},
|
||||
[]string{"rule", "severity"},
|
||||
)
|
||||
|
||||
// UCXI metrics
|
||||
m.ucxiRequests = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -417,7 +428,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"method", "status"},
|
||||
)
|
||||
|
||||
|
||||
m.ucxiResolutionLatency = promauto.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "chorus_ucxi_resolution_latency_seconds",
|
||||
@@ -425,7 +436,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Buckets: config.LatencyBuckets,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
// Resource metrics
|
||||
m.cpuUsage = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
@@ -433,14 +444,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
Help: "CPU usage ratio (0-1)",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.memoryUsage = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_memory_usage_bytes",
|
||||
Help: "Memory usage in bytes",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
m.diskUsage = promauto.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_disk_usage_ratio",
|
||||
@@ -448,14 +459,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"mount_point"},
|
||||
)
|
||||
|
||||
|
||||
m.goroutines = promauto.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "chorus_goroutines",
|
||||
Help: "Number of goroutines",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
// Error metrics
|
||||
m.errors = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -464,7 +475,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
[]string{"component", "error_type"},
|
||||
)
|
||||
|
||||
|
||||
m.panics = promauto.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_panics_total",
|
||||
@@ -482,31 +493,31 @@ func (m *CHORUSMetrics) registerMetrics() {
|
||||
// StartServer starts the Prometheus metrics HTTP server
|
||||
func (m *CHORUSMetrics) StartServer(config *MetricsConfig) error {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
|
||||
// Use custom registry
|
||||
handler := promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{
|
||||
EnableOpenMetrics: true,
|
||||
})
|
||||
mux.Handle(config.MetricsPath, handler)
|
||||
|
||||
|
||||
// Health endpoint
|
||||
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte("OK"))
|
||||
})
|
||||
|
||||
|
||||
m.httpServer = &http.Server{
|
||||
Addr: config.ListenAddr,
|
||||
Handler: mux,
|
||||
}
|
||||
|
||||
|
||||
go func() {
|
||||
log.Printf("Starting metrics server on %s%s", config.ListenAddr, config.MetricsPath)
|
||||
if err := m.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
log.Printf("Metrics server error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -656,6 +667,15 @@ func (m *CHORUSMetrics) SetSLURPQueueLength(length int) {
|
||||
m.slurpQueueLength.Set(float64(length))
|
||||
}
|
||||
|
||||
// SHHH Metrics Methods
|
||||
|
||||
func (m *CHORUSMetrics) IncrementSHHHFindings(rule, severity string, count int) {
|
||||
if m == nil || m.shhhFindings == nil || count <= 0 {
|
||||
return
|
||||
}
|
||||
m.shhhFindings.WithLabelValues(rule, severity).Add(float64(count))
|
||||
}
|
||||
|
||||
// UCXI Metrics Methods
|
||||
|
||||
func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) {
|
||||
@@ -708,21 +728,21 @@ func (m *CHORUSMetrics) UpdateUptime() {
|
||||
func (m *CHORUSMetrics) CollectMetrics(config *MetricsConfig) {
|
||||
systemTicker := time.NewTicker(config.SystemMetricsInterval)
|
||||
resourceTicker := time.NewTicker(config.ResourceMetricsInterval)
|
||||
|
||||
|
||||
go func() {
|
||||
defer systemTicker.Stop()
|
||||
defer resourceTicker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-systemTicker.C:
|
||||
m.UpdateUptime()
|
||||
// Collect other system metrics
|
||||
|
||||
|
||||
case <-resourceTicker.C:
|
||||
// Collect resource metrics (would integrate with actual system monitoring)
|
||||
// m.collectResourceMetrics()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
11
pkg/shhh/doc.go
Normal file
11
pkg/shhh/doc.go
Normal file
@@ -0,0 +1,11 @@
|
||||
// Package shhh provides the CHORUS secrets sentinel responsible for detecting
|
||||
// and redacting sensitive values before they leave the runtime. The sentinel
|
||||
// focuses on predictable failure modes (log emission, telemetry fan-out,
|
||||
// request forwarding) and offers a composable API for registering additional
|
||||
// redaction rules, emitting audit events, and tracking operational metrics.
|
||||
//
|
||||
// The initial implementation focuses on high-signal secrets (API keys,
|
||||
// bearer/OAuth tokens, private keys) so the runtime can start integrating
|
||||
// SHHH into COOEE and WHOOSH logging immediately while the broader roadmap
|
||||
// items (automated redaction replay, policy driven rules) continue landing.
|
||||
package shhh
|
||||
130
pkg/shhh/rule.go
Normal file
130
pkg/shhh/rule.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type compiledRule struct {
|
||||
name string
|
||||
regex *regexp.Regexp
|
||||
replacement string
|
||||
severity Severity
|
||||
tags []string
|
||||
}
|
||||
|
||||
type matchRecord struct {
|
||||
value string
|
||||
}
|
||||
|
||||
func (r *compiledRule) apply(in string) (string, []matchRecord) {
|
||||
indices := r.regex.FindAllStringSubmatchIndex(in, -1)
|
||||
if len(indices) == 0 {
|
||||
return in, nil
|
||||
}
|
||||
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(in))
|
||||
|
||||
matches := make([]matchRecord, 0, len(indices))
|
||||
last := 0
|
||||
for _, loc := range indices {
|
||||
start, end := loc[0], loc[1]
|
||||
builder.WriteString(in[last:start])
|
||||
replaced := r.regex.ExpandString(nil, r.replacement, in, loc)
|
||||
builder.Write(replaced)
|
||||
matches = append(matches, matchRecord{value: in[start:end]})
|
||||
last = end
|
||||
}
|
||||
builder.WriteString(in[last:])
|
||||
|
||||
return builder.String(), matches
|
||||
}
|
||||
|
||||
func buildDefaultRuleConfigs(placeholder string) []RuleConfig {
|
||||
if placeholder == "" {
|
||||
placeholder = "[REDACTED]"
|
||||
}
|
||||
return []RuleConfig{
|
||||
{
|
||||
Name: "bearer-token",
|
||||
Pattern: `(?i)(authorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`,
|
||||
ReplacementTemplate: "$1" + placeholder,
|
||||
Severity: SeverityMedium,
|
||||
Tags: []string{"token", "http"},
|
||||
},
|
||||
{
|
||||
Name: "api-key",
|
||||
Pattern: `(?i)((?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"credentials"},
|
||||
},
|
||||
{
|
||||
Name: "openai-secret",
|
||||
Pattern: `(sk-[A-Za-z0-9]{20,})`,
|
||||
ReplacementTemplate: placeholder,
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"llm", "api"},
|
||||
},
|
||||
{
|
||||
Name: "oauth-refresh-token",
|
||||
Pattern: `(?i)(refresh_token"?\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||
Severity: SeverityMedium,
|
||||
Tags: []string{"oauth"},
|
||||
},
|
||||
{
|
||||
Name: "private-key-block",
|
||||
Pattern: `(?s)(-----BEGIN [^-]+ PRIVATE KEY-----)[^-]+(-----END [^-]+ PRIVATE KEY-----)`,
|
||||
ReplacementTemplate: "$1\n" + placeholder + "\n$2",
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"pem", "key"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func compileRules(cfg Config, placeholder string) ([]*compiledRule, error) {
|
||||
configs := make([]RuleConfig, 0)
|
||||
if !cfg.DisableDefaultRules {
|
||||
configs = append(configs, buildDefaultRuleConfigs(placeholder)...)
|
||||
}
|
||||
configs = append(configs, cfg.CustomRules...)
|
||||
|
||||
rules := make([]*compiledRule, 0, len(configs))
|
||||
for _, rc := range configs {
|
||||
if rc.Name == "" || rc.Pattern == "" {
|
||||
continue
|
||||
}
|
||||
replacement := rc.ReplacementTemplate
|
||||
if replacement == "" {
|
||||
replacement = placeholder
|
||||
}
|
||||
re, err := regexp.Compile(rc.Pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
compiled := &compiledRule{
|
||||
name: rc.Name,
|
||||
replacement: replacement,
|
||||
regex: re,
|
||||
severity: rc.Severity,
|
||||
tags: append([]string(nil), rc.Tags...),
|
||||
}
|
||||
rules = append(rules, compiled)
|
||||
}
|
||||
|
||||
sort.SliceStable(rules, func(i, j int) bool {
|
||||
return rules[i].name < rules[j].name
|
||||
})
|
||||
|
||||
return rules, nil
|
||||
}
|
||||
|
||||
func hashSecret(value string) string {
|
||||
sum := sha256.Sum256([]byte(value))
|
||||
return base64.RawStdEncoding.EncodeToString(sum[:])
|
||||
}
|
||||
407
pkg/shhh/sentinel.go
Normal file
407
pkg/shhh/sentinel.go
Normal file
@@ -0,0 +1,407 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Option configures the sentinel during construction.
|
||||
type Option func(*Sentinel)
|
||||
|
||||
// FindingObserver receives aggregated findings for each redaction operation.
|
||||
type FindingObserver func(context.Context, []Finding)
|
||||
|
||||
// WithAuditSink attaches an audit sink for per-redaction events.
|
||||
func WithAuditSink(sink AuditSink) Option {
|
||||
return func(s *Sentinel) {
|
||||
s.audit = sink
|
||||
}
|
||||
}
|
||||
|
||||
// WithStats allows callers to supply a shared stats collector.
|
||||
func WithStats(stats *Stats) Option {
|
||||
return func(s *Sentinel) {
|
||||
s.stats = stats
|
||||
}
|
||||
}
|
||||
|
||||
// WithFindingObserver registers an observer that is invoked whenever redaction
|
||||
// produces findings.
|
||||
func WithFindingObserver(observer FindingObserver) Option {
|
||||
return func(s *Sentinel) {
|
||||
if observer == nil {
|
||||
return
|
||||
}
|
||||
s.observers = append(s.observers, observer)
|
||||
}
|
||||
}
|
||||
|
||||
// Sentinel performs secret detection/redaction across text payloads.
|
||||
type Sentinel struct {
|
||||
mu sync.RWMutex
|
||||
enabled bool
|
||||
placeholder string
|
||||
rules []*compiledRule
|
||||
audit AuditSink
|
||||
stats *Stats
|
||||
observers []FindingObserver
|
||||
}
|
||||
|
||||
// NewSentinel creates a new secrets sentinel using the provided configuration.
|
||||
func NewSentinel(cfg Config, opts ...Option) (*Sentinel, error) {
|
||||
placeholder := cfg.RedactionPlaceholder
|
||||
if placeholder == "" {
|
||||
placeholder = "[REDACTED]"
|
||||
}
|
||||
|
||||
s := &Sentinel{
|
||||
enabled: !cfg.Disabled,
|
||||
placeholder: placeholder,
|
||||
stats: NewStats(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(s)
|
||||
}
|
||||
if s.stats == nil {
|
||||
s.stats = NewStats()
|
||||
}
|
||||
|
||||
rules, err := compileRules(cfg, placeholder)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("compile SHHH rules: %w", err)
|
||||
}
|
||||
if len(rules) == 0 {
|
||||
return nil, errors.New("no SHHH rules configured")
|
||||
}
|
||||
s.rules = rules
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Enabled reports whether the sentinel is actively redacting.
|
||||
func (s *Sentinel) Enabled() bool {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.enabled
|
||||
}
|
||||
|
||||
// Toggle enables or disables the sentinel at runtime.
|
||||
func (s *Sentinel) Toggle(enabled bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.enabled = enabled
|
||||
}
|
||||
|
||||
// SetAuditSink updates the audit sink at runtime.
|
||||
func (s *Sentinel) SetAuditSink(sink AuditSink) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.audit = sink
|
||||
}
|
||||
|
||||
// AddFindingObserver registers an observer after construction.
|
||||
func (s *Sentinel) AddFindingObserver(observer FindingObserver) {
|
||||
if observer == nil {
|
||||
return
|
||||
}
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.observers = append(s.observers, observer)
|
||||
}
|
||||
|
||||
// StatsSnapshot returns a snapshot of the current counters.
|
||||
func (s *Sentinel) StatsSnapshot() StatsSnapshot {
|
||||
s.mu.RLock()
|
||||
stats := s.stats
|
||||
s.mu.RUnlock()
|
||||
if stats == nil {
|
||||
return StatsSnapshot{}
|
||||
}
|
||||
return stats.Snapshot()
|
||||
}
|
||||
|
||||
// RedactText scans the provided text and redacts any findings.
|
||||
func (s *Sentinel) RedactText(ctx context.Context, text string, labels map[string]string) (string, []Finding) {
|
||||
s.mu.RLock()
|
||||
enabled := s.enabled
|
||||
rules := s.rules
|
||||
stats := s.stats
|
||||
audit := s.audit
|
||||
s.mu.RUnlock()
|
||||
|
||||
if !enabled || len(rules) == 0 {
|
||||
return text, nil
|
||||
}
|
||||
if stats != nil {
|
||||
stats.IncScan()
|
||||
}
|
||||
|
||||
aggregates := make(map[string]*findingAggregate)
|
||||
current := text
|
||||
path := derivePath(labels)
|
||||
|
||||
for _, rule := range rules {
|
||||
redacted, matches := rule.apply(current)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
current = redacted
|
||||
if stats != nil {
|
||||
stats.AddFindings(rule.name, len(matches))
|
||||
}
|
||||
recordAggregate(aggregates, rule, path, len(matches))
|
||||
|
||||
if audit != nil {
|
||||
metadata := cloneLabels(labels)
|
||||
for _, match := range matches {
|
||||
event := AuditEvent{
|
||||
Rule: rule.name,
|
||||
Severity: rule.severity,
|
||||
Tags: append([]string(nil), rule.tags...),
|
||||
Path: path,
|
||||
Hash: hashSecret(match.value),
|
||||
Metadata: metadata,
|
||||
}
|
||||
audit.RecordRedaction(ctx, event)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
findings := flattenAggregates(aggregates)
|
||||
s.notifyObservers(ctx, findings)
|
||||
return current, findings
|
||||
}
|
||||
|
||||
// RedactMap walks the map and redacts in-place. It returns the collected findings.
|
||||
func (s *Sentinel) RedactMap(ctx context.Context, payload map[string]any) []Finding {
|
||||
return s.RedactMapWithLabels(ctx, payload, nil)
|
||||
}
|
||||
|
||||
// RedactMapWithLabels allows callers to specify base labels that will be merged
|
||||
// into metadata for nested structures.
|
||||
func (s *Sentinel) RedactMapWithLabels(ctx context.Context, payload map[string]any, baseLabels map[string]string) []Finding {
|
||||
if payload == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
aggregates := make(map[string]*findingAggregate)
|
||||
s.redactValue(ctx, payload, "", baseLabels, aggregates)
|
||||
findings := flattenAggregates(aggregates)
|
||||
s.notifyObservers(ctx, findings)
|
||||
return findings
|
||||
}
|
||||
|
||||
func (s *Sentinel) redactValue(ctx context.Context, value any, path string, baseLabels map[string]string, agg map[string]*findingAggregate) {
|
||||
switch v := value.(type) {
|
||||
case map[string]interface{}:
|
||||
for key, val := range v {
|
||||
childPath := joinPath(path, key)
|
||||
switch typed := val.(type) {
|
||||
case string:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||
if redacted != typed {
|
||||
v[key] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
case fmt.Stringer:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
text := typed.String()
|
||||
redacted, findings := s.RedactText(ctx, text, labels)
|
||||
if redacted != text {
|
||||
v[key] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
default:
|
||||
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||
}
|
||||
}
|
||||
case []interface{}:
|
||||
for idx, item := range v {
|
||||
childPath := indexPath(path, idx)
|
||||
switch typed := item.(type) {
|
||||
case string:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||
if redacted != typed {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
case fmt.Stringer:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
text := typed.String()
|
||||
redacted, findings := s.RedactText(ctx, text, labels)
|
||||
if redacted != text {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
default:
|
||||
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||
}
|
||||
}
|
||||
case []string:
|
||||
for idx, item := range v {
|
||||
childPath := indexPath(path, idx)
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, item, labels)
|
||||
if redacted != item {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Sentinel) notifyObservers(ctx context.Context, findings []Finding) {
|
||||
if len(findings) == 0 {
|
||||
return
|
||||
}
|
||||
findingsCopy := append([]Finding(nil), findings...)
|
||||
s.mu.RLock()
|
||||
observers := append([]FindingObserver(nil), s.observers...)
|
||||
s.mu.RUnlock()
|
||||
for _, observer := range observers {
|
||||
observer(ctx, findingsCopy)
|
||||
}
|
||||
}
|
||||
|
||||
func mergeAggregates(dest map[string]*findingAggregate, findings []Finding) {
|
||||
for i := range findings {
|
||||
f := findings[i]
|
||||
agg := dest[f.Rule]
|
||||
if agg == nil {
|
||||
agg = &findingAggregate{
|
||||
rule: f.Rule,
|
||||
severity: f.Severity,
|
||||
tags: append([]string(nil), f.Tags...),
|
||||
locations: make(map[string]int),
|
||||
}
|
||||
dest[f.Rule] = agg
|
||||
}
|
||||
agg.count += f.Count
|
||||
for _, loc := range f.Locations {
|
||||
agg.locations[loc.Path] += loc.Count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func recordAggregate(dest map[string]*findingAggregate, rule *compiledRule, path string, count int) {
|
||||
agg := dest[rule.name]
|
||||
if agg == nil {
|
||||
agg = &findingAggregate{
|
||||
rule: rule.name,
|
||||
severity: rule.severity,
|
||||
tags: append([]string(nil), rule.tags...),
|
||||
locations: make(map[string]int),
|
||||
}
|
||||
dest[rule.name] = agg
|
||||
}
|
||||
agg.count += count
|
||||
if path != "" {
|
||||
agg.locations[path] += count
|
||||
}
|
||||
}
|
||||
|
||||
func flattenAggregates(agg map[string]*findingAggregate) []Finding {
|
||||
if len(agg) == 0 {
|
||||
return nil
|
||||
}
|
||||
keys := make([]string, 0, len(agg))
|
||||
for key := range agg {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
findings := make([]Finding, 0, len(agg))
|
||||
for _, key := range keys {
|
||||
entry := agg[key]
|
||||
locations := make([]Location, 0, len(entry.locations))
|
||||
if len(entry.locations) > 0 {
|
||||
paths := make([]string, 0, len(entry.locations))
|
||||
for path := range entry.locations {
|
||||
paths = append(paths, path)
|
||||
}
|
||||
sort.Strings(paths)
|
||||
for _, path := range paths {
|
||||
locations = append(locations, Location{Path: path, Count: entry.locations[path]})
|
||||
}
|
||||
}
|
||||
findings = append(findings, Finding{
|
||||
Rule: entry.rule,
|
||||
Severity: entry.severity,
|
||||
Tags: append([]string(nil), entry.tags...),
|
||||
Count: entry.count,
|
||||
Locations: locations,
|
||||
})
|
||||
}
|
||||
return findings
|
||||
}
|
||||
|
||||
func derivePath(labels map[string]string) string {
|
||||
if labels == nil {
|
||||
return ""
|
||||
}
|
||||
if path := labels["path"]; path != "" {
|
||||
return path
|
||||
}
|
||||
if path := labels["source"]; path != "" {
|
||||
return path
|
||||
}
|
||||
if path := labels["field"]; path != "" {
|
||||
return path
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func cloneLabels(labels map[string]string) map[string]string {
|
||||
if len(labels) == 0 {
|
||||
return nil
|
||||
}
|
||||
clone := make(map[string]string, len(labels))
|
||||
for k, v := range labels {
|
||||
clone[k] = v
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
func joinPath(prefix, key string) string {
|
||||
if prefix == "" {
|
||||
return key
|
||||
}
|
||||
if key == "" {
|
||||
return prefix
|
||||
}
|
||||
return prefix + "." + key
|
||||
}
|
||||
|
||||
func indexPath(prefix string, idx int) string {
|
||||
if prefix == "" {
|
||||
return fmt.Sprintf("[%d]", idx)
|
||||
}
|
||||
return fmt.Sprintf("%s[%d]", prefix, idx)
|
||||
}
|
||||
|
||||
func mergeLabels(base map[string]string, path string) map[string]string {
|
||||
if base == nil && path == "" {
|
||||
return nil
|
||||
}
|
||||
labels := cloneLabels(base)
|
||||
if labels == nil {
|
||||
labels = make(map[string]string, 1)
|
||||
}
|
||||
if path != "" {
|
||||
labels["path"] = path
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
||||
type findingAggregate struct {
|
||||
rule string
|
||||
severity Severity
|
||||
tags []string
|
||||
count int
|
||||
locations map[string]int
|
||||
}
|
||||
95
pkg/shhh/sentinel_test.go
Normal file
95
pkg/shhh/sentinel_test.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type recordingSink struct {
|
||||
events []AuditEvent
|
||||
}
|
||||
|
||||
func (r *recordingSink) RecordRedaction(_ context.Context, event AuditEvent) {
|
||||
r.events = append(r.events, event)
|
||||
}
|
||||
|
||||
func TestRedactText_DefaultRules(t *testing.T) {
|
||||
sentinel, err := NewSentinel(Config{})
|
||||
require.NoError(t, err)
|
||||
|
||||
input := "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.secret"
|
||||
redacted, findings := sentinel.RedactText(context.Background(), input, map[string]string{"source": "http.request.headers.authorization"})
|
||||
|
||||
require.Equal(t, "Authorization: Bearer [REDACTED]", redacted)
|
||||
require.Len(t, findings, 1)
|
||||
require.Equal(t, "bearer-token", findings[0].Rule)
|
||||
require.Equal(t, 1, findings[0].Count)
|
||||
require.NotEmpty(t, findings[0].Locations)
|
||||
|
||||
snapshot := sentinel.StatsSnapshot()
|
||||
require.Equal(t, uint64(1), snapshot.TotalScans)
|
||||
require.Equal(t, uint64(1), snapshot.TotalFindings)
|
||||
require.Equal(t, uint64(1), snapshot.PerRuleFindings["bearer-token"])
|
||||
}
|
||||
|
||||
func TestRedactMap_NestedStructures(t *testing.T) {
|
||||
sentinel, err := NewSentinel(Config{})
|
||||
require.NoError(t, err)
|
||||
|
||||
payload := map[string]any{
|
||||
"config": map[string]any{
|
||||
"api_key": "API_KEY=1234567890ABCDEFG",
|
||||
},
|
||||
"tokens": []any{
|
||||
"sk-test1234567890ABCDEF",
|
||||
map[string]any{"refresh": "refresh_token=abcdef12345"},
|
||||
},
|
||||
}
|
||||
|
||||
findings := sentinel.RedactMap(context.Background(), payload)
|
||||
require.NotEmpty(t, findings)
|
||||
|
||||
config := payload["config"].(map[string]any)
|
||||
require.Equal(t, "API_KEY=[REDACTED]", config["api_key"])
|
||||
|
||||
tokens := payload["tokens"].([]any)
|
||||
require.Equal(t, "[REDACTED]", tokens[0])
|
||||
|
||||
inner := tokens[1].(map[string]any)
|
||||
require.Equal(t, "refresh_token=[REDACTED]", inner["refresh"])
|
||||
|
||||
total := 0
|
||||
for _, finding := range findings {
|
||||
total += finding.Count
|
||||
}
|
||||
require.Equal(t, 3, total)
|
||||
}
|
||||
|
||||
func TestAuditSinkReceivesEvents(t *testing.T) {
|
||||
sink := &recordingSink{}
|
||||
cfg := Config{
|
||||
DisableDefaultRules: true,
|
||||
CustomRules: []RuleConfig{
|
||||
{
|
||||
Name: "custom-secret",
|
||||
Pattern: `(secret\s*=\s*)([A-Za-z0-9]{6,})`,
|
||||
ReplacementTemplate: "$1[REDACTED]",
|
||||
Severity: SeverityHigh,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sentinel, err := NewSentinel(cfg, WithAuditSink(sink))
|
||||
require.NoError(t, err)
|
||||
|
||||
_, findings := sentinel.RedactText(context.Background(), "secret=mysecretvalue", map[string]string{"source": "test"})
|
||||
require.Len(t, findings, 1)
|
||||
require.Equal(t, 1, findings[0].Count)
|
||||
|
||||
require.Len(t, sink.events, 1)
|
||||
require.Equal(t, "custom-secret", sink.events[0].Rule)
|
||||
require.NotEmpty(t, sink.events[0].Hash)
|
||||
require.Equal(t, "test", sink.events[0].Path)
|
||||
}
|
||||
60
pkg/shhh/stats.go
Normal file
60
pkg/shhh/stats.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Stats tracks aggregate counts for the sentinel.
|
||||
type Stats struct {
|
||||
totalScans atomic.Uint64
|
||||
totalFindings atomic.Uint64
|
||||
perRule sync.Map // string -> *atomic.Uint64
|
||||
}
|
||||
|
||||
// NewStats constructs a Stats collector.
|
||||
func NewStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
||||
|
||||
// IncScan increments the total scan counter.
|
||||
func (s *Stats) IncScan() {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.totalScans.Add(1)
|
||||
}
|
||||
|
||||
// AddFindings records findings for a rule.
|
||||
func (s *Stats) AddFindings(rule string, count int) {
|
||||
if s == nil || count <= 0 {
|
||||
return
|
||||
}
|
||||
s.totalFindings.Add(uint64(count))
|
||||
counterAny, _ := s.perRule.LoadOrStore(rule, new(atomic.Uint64))
|
||||
counter := counterAny.(*atomic.Uint64)
|
||||
counter.Add(uint64(count))
|
||||
}
|
||||
|
||||
// Snapshot returns a point-in-time view of the counters.
|
||||
func (s *Stats) Snapshot() StatsSnapshot {
|
||||
if s == nil {
|
||||
return StatsSnapshot{}
|
||||
}
|
||||
snapshot := StatsSnapshot{
|
||||
TotalScans: s.totalScans.Load(),
|
||||
TotalFindings: s.totalFindings.Load(),
|
||||
PerRuleFindings: make(map[string]uint64),
|
||||
}
|
||||
s.perRule.Range(func(key, value any) bool {
|
||||
name, ok := key.(string)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
if counter, ok := value.(*atomic.Uint64); ok {
|
||||
snapshot.PerRuleFindings[name] = counter.Load()
|
||||
}
|
||||
return true
|
||||
})
|
||||
return snapshot
|
||||
}
|
||||
73
pkg/shhh/types.go
Normal file
73
pkg/shhh/types.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package shhh
|
||||
|
||||
import "context"
|
||||
|
||||
// Severity represents the criticality associated with a redaction finding.
|
||||
type Severity string
|
||||
|
||||
const (
|
||||
// SeverityLow indicates low-impact findings (e.g. non-production credentials).
|
||||
SeverityLow Severity = "low"
|
||||
// SeverityMedium indicates medium impact findings (e.g. access tokens).
|
||||
SeverityMedium Severity = "medium"
|
||||
// SeverityHigh indicates high-impact findings (e.g. private keys).
|
||||
SeverityHigh Severity = "high"
|
||||
)
|
||||
|
||||
// RuleConfig defines a redaction rule that SHHH should enforce.
|
||||
type RuleConfig struct {
|
||||
Name string `json:"name"`
|
||||
Pattern string `json:"pattern"`
|
||||
ReplacementTemplate string `json:"replacement_template"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
// Config controls sentinel behaviour.
|
||||
type Config struct {
|
||||
// Disabled toggles redaction off entirely.
|
||||
Disabled bool `json:"disabled"`
|
||||
// RedactionPlaceholder overrides the default placeholder value.
|
||||
RedactionPlaceholder string `json:"redaction_placeholder"`
|
||||
// DisableDefaultRules disables the built-in curated rule set.
|
||||
DisableDefaultRules bool `json:"disable_default_rules"`
|
||||
// CustomRules allows callers to append bespoke redaction patterns.
|
||||
CustomRules []RuleConfig `json:"custom_rules"`
|
||||
}
|
||||
|
||||
// Finding represents a single rule firing during redaction.
|
||||
type Finding struct {
|
||||
Rule string `json:"rule"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Count int `json:"count"`
|
||||
Locations []Location `json:"locations,omitempty"`
|
||||
}
|
||||
|
||||
// Location describes where a secret was found.
|
||||
type Location struct {
|
||||
Path string `json:"path"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// StatsSnapshot exposes aggregate counters for observability.
|
||||
type StatsSnapshot struct {
|
||||
TotalScans uint64 `json:"total_scans"`
|
||||
TotalFindings uint64 `json:"total_findings"`
|
||||
PerRuleFindings map[string]uint64 `json:"per_rule_findings"`
|
||||
}
|
||||
|
||||
// AuditEvent captures a single redaction occurrence for downstream sinks.
|
||||
type AuditEvent struct {
|
||||
Rule string `json:"rule"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
Hash string `json:"hash"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// AuditSink receives redaction events for long term storage / replay.
|
||||
type AuditSink interface {
|
||||
RecordRedaction(ctx context.Context, event AuditEvent)
|
||||
}
|
||||
@@ -13,11 +13,11 @@ import (
|
||||
|
||||
// DecisionPublisher handles publishing task completion decisions to encrypted DHT storage
|
||||
type DecisionPublisher struct {
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
dhtStorage storage.UCXLStorage
|
||||
nodeID string
|
||||
agentName string
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
dhtStorage storage.UCXLStorage
|
||||
nodeID string
|
||||
agentName string
|
||||
}
|
||||
|
||||
// NewDecisionPublisher creates a new decision publisher
|
||||
@@ -39,28 +39,28 @@ func NewDecisionPublisher(
|
||||
|
||||
// TaskDecision represents a decision made by an agent upon task completion
|
||||
type TaskDecision struct {
|
||||
Agent string `json:"agent"`
|
||||
Role string `json:"role"`
|
||||
Project string `json:"project"`
|
||||
Task string `json:"task"`
|
||||
Decision string `json:"decision"`
|
||||
Context map[string]interface{} `json:"context"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Success bool `json:"success"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
FilesModified []string `json:"files_modified,omitempty"`
|
||||
LinesChanged int `json:"lines_changed,omitempty"`
|
||||
TestResults *TestResults `json:"test_results,omitempty"`
|
||||
Dependencies []string `json:"dependencies,omitempty"`
|
||||
NextSteps []string `json:"next_steps,omitempty"`
|
||||
Agent string `json:"agent"`
|
||||
Role string `json:"role"`
|
||||
Project string `json:"project"`
|
||||
Task string `json:"task"`
|
||||
Decision string `json:"decision"`
|
||||
Context map[string]interface{} `json:"context"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Success bool `json:"success"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
FilesModified []string `json:"files_modified,omitempty"`
|
||||
LinesChanged int `json:"lines_changed,omitempty"`
|
||||
TestResults *TestResults `json:"test_results,omitempty"`
|
||||
Dependencies []string `json:"dependencies,omitempty"`
|
||||
NextSteps []string `json:"next_steps,omitempty"`
|
||||
}
|
||||
|
||||
// TestResults captures test execution results
|
||||
type TestResults struct {
|
||||
Passed int `json:"passed"`
|
||||
Failed int `json:"failed"`
|
||||
Skipped int `json:"skipped"`
|
||||
Coverage float64 `json:"coverage,omitempty"`
|
||||
Passed int `json:"passed"`
|
||||
Failed int `json:"failed"`
|
||||
Skipped int `json:"skipped"`
|
||||
Coverage float64 `json:"coverage,omitempty"`
|
||||
FailedTests []string `json:"failed_tests,omitempty"`
|
||||
}
|
||||
|
||||
@@ -74,7 +74,11 @@ func (dp *DecisionPublisher) PublishTaskDecision(decision *TaskDecision) error {
|
||||
decision.Role = dp.config.Agent.Role
|
||||
}
|
||||
if decision.Project == "" {
|
||||
decision.Project = "default-project" // TODO: Add project field to config
|
||||
if project := dp.config.Agent.Project; project != "" {
|
||||
decision.Project = project
|
||||
} else {
|
||||
decision.Project = "chorus"
|
||||
}
|
||||
}
|
||||
if decision.Timestamp.IsZero() {
|
||||
decision.Timestamp = time.Now()
|
||||
@@ -173,16 +177,16 @@ func (dp *DecisionPublisher) PublishArchitecturalDecision(
|
||||
nextSteps []string,
|
||||
) error {
|
||||
taskDecision := &TaskDecision{
|
||||
Task: taskName,
|
||||
Decision: decision,
|
||||
Success: true,
|
||||
Task: taskName,
|
||||
Decision: decision,
|
||||
Success: true,
|
||||
NextSteps: nextSteps,
|
||||
Context: map[string]interface{}{
|
||||
"decision_type": "architecture",
|
||||
"rationale": rationale,
|
||||
"alternatives": alternatives,
|
||||
"implications": implications,
|
||||
"node_id": dp.nodeID,
|
||||
"decision_type": "architecture",
|
||||
"rationale": rationale,
|
||||
"alternatives": alternatives,
|
||||
"implications": implications,
|
||||
"node_id": dp.nodeID,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -291,7 +295,7 @@ func (dp *DecisionPublisher) SubscribeToDecisions(
|
||||
) error {
|
||||
// This is a placeholder for future pubsub implementation
|
||||
// For now, we'll implement a simple polling mechanism
|
||||
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
@@ -341,10 +345,10 @@ func (dp *DecisionPublisher) PublishSystemStatus(
|
||||
Decision: status,
|
||||
Success: dp.allHealthChecksPass(healthChecks),
|
||||
Context: map[string]interface{}{
|
||||
"decision_type": "system",
|
||||
"metrics": metrics,
|
||||
"health_checks": healthChecks,
|
||||
"node_id": dp.nodeID,
|
||||
"decision_type": "system",
|
||||
"metrics": metrics,
|
||||
"health_checks": healthChecks,
|
||||
"node_id": dp.nodeID,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -364,13 +368,17 @@ func (dp *DecisionPublisher) allHealthChecksPass(healthChecks map[string]bool) b
|
||||
// GetPublisherMetrics returns metrics about the decision publisher
|
||||
func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} {
|
||||
dhtMetrics := dp.dhtStorage.GetMetrics()
|
||||
|
||||
return map[string]interface{}{
|
||||
"node_id": dp.nodeID,
|
||||
"agent_name": dp.agentName,
|
||||
"current_role": dp.config.Agent.Role,
|
||||
"project": "default-project", // TODO: Add project field to config
|
||||
"dht_metrics": dhtMetrics,
|
||||
"last_publish": time.Now(), // This would be tracked in a real implementation
|
||||
project := dp.config.Agent.Project
|
||||
if project == "" {
|
||||
project = "chorus"
|
||||
}
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"node_id": dp.nodeID,
|
||||
"agent_name": dp.agentName,
|
||||
"current_role": dp.config.Agent.Role,
|
||||
"project": project,
|
||||
"dht_metrics": dhtMetrics,
|
||||
"last_publish": time.Now(), // This would be tracked in a real implementation
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user