Harden CHORUS security and messaging stack
This commit is contained in:
@@ -19,8 +19,8 @@ import (
|
||||
type ElectionTrigger string
|
||||
|
||||
const (
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
||||
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
||||
TriggerManual ElectionTrigger = "manual_trigger"
|
||||
@@ -30,30 +30,35 @@ const (
|
||||
type ElectionState string
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
electionTopic = "CHORUS/election/v1"
|
||||
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
StateReconstructing ElectionState = "reconstructing_keys"
|
||||
StateComplete ElectionState = "complete"
|
||||
StateComplete ElectionState = "complete"
|
||||
)
|
||||
|
||||
// AdminCandidate represents a node candidate for admin role
|
||||
type AdminCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ResourceMetrics holds node resource information for election scoring
|
||||
type ResourceMetrics struct {
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
NetworkQuality float64 `json:"network_quality"`
|
||||
}
|
||||
|
||||
@@ -68,46 +73,46 @@ type ElectionMessage struct {
|
||||
|
||||
// ElectionManager handles admin election coordination
|
||||
type ElectionManager struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
// Election state
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
// Timers and channels
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
// Heartbeat management
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
// Callbacks
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onElectionComplete func(winner string)
|
||||
|
||||
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// HeartbeatManager manages admin heartbeat lifecycle
|
||||
type HeartbeatManager struct {
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
}
|
||||
|
||||
// NewElectionManager creates a new election manager
|
||||
@@ -119,7 +124,7 @@ func NewElectionManager(
|
||||
nodeID string,
|
||||
) *ElectionManager {
|
||||
electionCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
|
||||
em := &ElectionManager{
|
||||
ctx: electionCtx,
|
||||
cancel: cancel,
|
||||
@@ -133,7 +138,7 @@ func NewElectionManager(
|
||||
electionTrigger: make(chan ElectionTrigger, 10),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
// Initialize heartbeat manager
|
||||
em.heartbeatManager = &HeartbeatManager{
|
||||
electionMgr: em,
|
||||
@@ -141,29 +146,32 @@ func NewElectionManager(
|
||||
log.Printf("[HEARTBEAT] "+msg, args...)
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
// Start begins the election management system
|
||||
func (em *ElectionManager) Start() error {
|
||||
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
||||
|
||||
// TODO: Subscribe to election-related messages - pubsub interface needs update
|
||||
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
// }
|
||||
//
|
||||
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
// }
|
||||
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleElectionMessage(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
}
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleAdminHeartbeat(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// Start discovery process
|
||||
go em.startDiscoveryLoop()
|
||||
|
||||
|
||||
// Start election coordinator
|
||||
go em.electionCoordinator()
|
||||
|
||||
|
||||
// Start heartbeat if this node is already admin at startup
|
||||
if em.IsCurrentAdmin() {
|
||||
go func() {
|
||||
@@ -174,7 +182,7 @@ func (em *ElectionManager) Start() error {
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
log.Printf("✅ Election manager started")
|
||||
return nil
|
||||
}
|
||||
@@ -182,17 +190,17 @@ func (em *ElectionManager) Start() error {
|
||||
// Stop shuts down the election manager
|
||||
func (em *ElectionManager) Stop() {
|
||||
log.Printf("🛑 Stopping election manager")
|
||||
|
||||
|
||||
// Stop heartbeat first
|
||||
if em.heartbeatManager != nil {
|
||||
em.heartbeatManager.StopHeartbeat()
|
||||
}
|
||||
|
||||
|
||||
em.cancel()
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.heartbeatTimer != nil {
|
||||
em.heartbeatTimer.Stop()
|
||||
}
|
||||
@@ -255,7 +263,7 @@ func (em *ElectionManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
// startDiscoveryLoop starts the admin discovery loop
|
||||
func (em *ElectionManager) startDiscoveryLoop() {
|
||||
log.Printf("🔍 Starting admin discovery loop")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -272,19 +280,19 @@ func (em *ElectionManager) performAdminDiscovery() {
|
||||
currentState := em.state
|
||||
lastHeartbeat := em.lastHeartbeat
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Only discover if we're idle or the heartbeat is stale
|
||||
if currentState != StateIdle {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Check if admin heartbeat has timed out
|
||||
if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout {
|
||||
log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat)
|
||||
em.TriggerElection(TriggerHeartbeatTimeout)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// If we haven't heard from an admin recently, try to discover one
|
||||
if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 {
|
||||
em.sendDiscoveryRequest()
|
||||
@@ -298,7 +306,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(discoveryMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery request: %v", err)
|
||||
}
|
||||
@@ -307,7 +315,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
// electionCoordinator handles the main election logic
|
||||
func (em *ElectionManager) electionCoordinator() {
|
||||
log.Printf("🎯 Election coordinator started")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -321,17 +329,17 @@ func (em *ElectionManager) electionCoordinator() {
|
||||
// handleElectionTrigger processes election triggers
|
||||
func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
log.Printf("🔥 Processing election trigger: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
currentState := em.state
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Ignore triggers if we're already in an election
|
||||
if currentState != StateIdle {
|
||||
log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Begin election process
|
||||
em.beginElection(trigger)
|
||||
}
|
||||
@@ -339,7 +347,7 @@ func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
// beginElection starts a new election
|
||||
func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
log.Printf("🗳️ Beginning election due to: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateElecting
|
||||
em.currentTerm++
|
||||
@@ -347,12 +355,12 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Announce candidacy if this node can be admin
|
||||
if em.canBeAdmin() {
|
||||
em.announceCandidacy(term)
|
||||
}
|
||||
|
||||
|
||||
// Send election announcement
|
||||
electionMsg := ElectionMessage{
|
||||
Type: "election_started",
|
||||
@@ -363,11 +371,11 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
"trigger": string(trigger),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(electionMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election start: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Start election timeout
|
||||
em.startElectionTimeout(term)
|
||||
}
|
||||
@@ -386,7 +394,7 @@ func (em *ElectionManager) canBeAdmin() bool {
|
||||
// announceCandidacy announces this node as an election candidate
|
||||
func (em *ElectionManager) announceCandidacy(term int) {
|
||||
uptime := time.Since(em.startTime)
|
||||
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: em.nodeID,
|
||||
PeerID: em.host.ID(),
|
||||
@@ -396,13 +404,13 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Experience: uptime, // For now, use uptime as experience
|
||||
Metadata: map[string]interface{}{
|
||||
"specialization": em.config.Agent.Specialization,
|
||||
"models": em.config.Agent.Models,
|
||||
"models": em.config.Agent.Models,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
// Calculate candidate score
|
||||
candidate.Score = em.calculateCandidateScore(candidate)
|
||||
|
||||
|
||||
candidacyMsg := ElectionMessage{
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: em.nodeID,
|
||||
@@ -410,9 +418,9 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Term: term,
|
||||
Data: candidate,
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📢 Announcing candidacy (score: %.2f)", candidate.Score)
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(candidacyMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce candidacy: %v", err)
|
||||
}
|
||||
@@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics {
|
||||
// TODO: Implement actual resource collection
|
||||
// For now, return simulated values
|
||||
return ResourceMetrics{
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
||||
}
|
||||
}
|
||||
@@ -435,10 +443,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
// TODO: Add LeadershipScoring to config.ElectionConfig
|
||||
// scoring := em.config.Security.ElectionConfig.LeadershipScoring
|
||||
// Default scoring weights handled inline
|
||||
|
||||
|
||||
// Normalize metrics to 0-1 range
|
||||
uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score
|
||||
|
||||
|
||||
// Capability score - higher for admin/coordination capabilities
|
||||
capabilityScore := 0.0
|
||||
adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"}
|
||||
@@ -455,22 +463,22 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
}
|
||||
}
|
||||
capabilityScore = min(1.0, capabilityScore)
|
||||
|
||||
|
||||
// Resource score - lower usage is better
|
||||
resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.MemoryUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.DiskUsage) * 0.2 +
|
||||
candidate.Resources.NetworkQuality * 0.2
|
||||
|
||||
resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 +
|
||||
(1.0-candidate.Resources.MemoryUsage)*0.3 +
|
||||
(1.0-candidate.Resources.DiskUsage)*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.2
|
||||
|
||||
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
||||
|
||||
|
||||
// Weighted final score (using default weights)
|
||||
finalScore := uptimeScore*0.3 +
|
||||
capabilityScore*0.2 +
|
||||
resourceScore*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.15 +
|
||||
experienceScore*0.15
|
||||
|
||||
|
||||
return finalScore
|
||||
}
|
||||
|
||||
@@ -478,11 +486,11 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.electionTimer != nil {
|
||||
em.electionTimer.Stop()
|
||||
}
|
||||
|
||||
|
||||
em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() {
|
||||
em.completeElection(term)
|
||||
})
|
||||
@@ -492,15 +500,15 @@ func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
func (em *ElectionManager) completeElection(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Verify this is still the current term
|
||||
if term != em.currentTerm {
|
||||
log.Printf("⏰ Election timeout for old term %d, ignoring", term)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("⏰ Election timeout reached, tallying votes")
|
||||
|
||||
|
||||
// Find the winning candidate
|
||||
winner := em.findElectionWinner()
|
||||
if winner == nil {
|
||||
@@ -513,14 +521,14 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
}()
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("🏆 Election winner: %s (score: %.2f)", winner.NodeID, winner.Score)
|
||||
|
||||
|
||||
// Update admin
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateComplete
|
||||
|
||||
|
||||
// Announce the winner
|
||||
winnerMsg := ElectionMessage{
|
||||
Type: "election_winner",
|
||||
@@ -529,16 +537,16 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
Term: term,
|
||||
Data: winner,
|
||||
}
|
||||
|
||||
|
||||
em.mu.Unlock() // Unlock before publishing
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(winnerMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election winner: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callbacks
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -546,7 +554,7 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
if em.onElectionComplete != nil {
|
||||
em.onElectionComplete(winner.NodeID)
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateIdle // Reset state for next election
|
||||
}
|
||||
@@ -556,16 +564,16 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
if len(em.candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// Count votes for each candidate
|
||||
voteCounts := make(map[string]int)
|
||||
totalVotes := 0
|
||||
|
||||
|
||||
// Initialize vote counts for all candidates
|
||||
for candidateID := range em.candidates {
|
||||
voteCounts[candidateID] = 0
|
||||
}
|
||||
|
||||
|
||||
// Tally actual votes
|
||||
for _, candidateID := range em.votes {
|
||||
if _, exists := em.candidates[candidateID]; exists {
|
||||
@@ -573,12 +581,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
totalVotes++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If no votes cast, fall back to highest scoring candidate
|
||||
if totalVotes == 0 {
|
||||
var winner *AdminCandidate
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for _, candidate := range em.candidates {
|
||||
if candidate.Score > highestScore {
|
||||
highestScore = candidate.Score
|
||||
@@ -587,12 +595,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
}
|
||||
return winner
|
||||
}
|
||||
|
||||
|
||||
// Find candidate with most votes
|
||||
var winner *AdminCandidate
|
||||
maxVotes := -1
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for candidateID, voteCount := range voteCounts {
|
||||
candidate := em.candidates[candidateID]
|
||||
if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) {
|
||||
@@ -601,10 +609,10 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
winner = candidate
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
totalVotes, winner.NodeID, maxVotes, winner.Score)
|
||||
|
||||
|
||||
return winner
|
||||
}
|
||||
|
||||
@@ -615,12 +623,12 @@ func (em *ElectionManager) handleElectionMessage(data []byte) {
|
||||
log.Printf("❌ Failed to unmarshal election message: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Ignore messages from ourselves
|
||||
if msg.NodeID == em.nodeID {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
switch msg.Type {
|
||||
case "admin_discovery_request":
|
||||
em.handleAdminDiscoveryRequest(msg)
|
||||
@@ -643,7 +651,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
currentAdmin := em.currentAdmin
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
|
||||
// Only respond if we know who the current admin is and we're idle
|
||||
if currentAdmin != "" && state == StateIdle {
|
||||
responseMsg := ElectionMessage{
|
||||
@@ -654,7 +662,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
"current_admin": currentAdmin,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(responseMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery response: %v", err)
|
||||
}
|
||||
@@ -679,7 +687,7 @@ func (em *ElectionManager) handleAdminDiscoveryResponse(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// If we receive an election start with a higher term, join the election
|
||||
if msg.Term > em.currentTerm {
|
||||
log.Printf("🔄 Joining election with term %d", msg.Term)
|
||||
@@ -687,7 +695,7 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.state = StateElecting
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
|
||||
|
||||
// Announce candidacy if eligible
|
||||
if em.canBeAdmin() {
|
||||
go em.announceCandidacy(msg.Term)
|
||||
@@ -699,25 +707,25 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Only process if it's for the current term
|
||||
if msg.Term != em.currentTerm {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Convert data to candidate struct
|
||||
candidateData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
log.Printf("❌ Failed to marshal candidate data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var candidate AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &candidate); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal candidate: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📝 Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score)
|
||||
em.candidates[candidate.NodeID] = &candidate
|
||||
}
|
||||
@@ -726,31 +734,31 @@ func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionVote(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Extract vote data
|
||||
voteData, ok := msg.Data.(map[string]interface{})
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid vote data format from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
candidateID, ok := voteData["candidate"].(string)
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Validate candidate exists
|
||||
if _, exists := em.candidates[candidateID]; !exists {
|
||||
log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Prevent duplicate voting
|
||||
if existingVote, exists := em.votes[msg.NodeID]; exists {
|
||||
log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID)
|
||||
}
|
||||
|
||||
|
||||
// Record the vote
|
||||
em.votes[msg.NodeID] = candidateID
|
||||
log.Printf("🗳️ Recorded vote from %s for candidate %s", msg.NodeID, candidateID)
|
||||
@@ -763,24 +771,24 @@ func (em *ElectionManager) handleElectionWinner(msg ElectionMessage) {
|
||||
log.Printf("❌ Failed to marshal winner data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var winner AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &winner); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal winner: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateIdle
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
log.Printf("👑 New admin elected: %s", winner.NodeID)
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callback
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -796,7 +804,7 @@ func (em *ElectionManager) handleHeartbeatTransition(oldAdmin, newAdmin string)
|
||||
log.Printf("⚠️ Error stopping heartbeat: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If we gained admin role, start heartbeat
|
||||
if newAdmin == em.nodeID && oldAdmin != em.nodeID {
|
||||
log.Printf("🔄 Gained admin role, starting heartbeat")
|
||||
@@ -816,15 +824,15 @@ func (em *ElectionManager) handleAdminHeartbeat(data []byte) {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
|
||||
if err := json.Unmarshal(data, &heartbeat); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal heartbeat: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Update admin and heartbeat timestamp
|
||||
if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID {
|
||||
em.currentAdmin = heartbeat.NodeID
|
||||
@@ -838,11 +846,8 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal election message: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/election/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(electionTopic, data)
|
||||
}
|
||||
|
||||
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
||||
@@ -850,7 +855,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
if !em.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not current admin")
|
||||
}
|
||||
|
||||
|
||||
heartbeat := struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
@@ -858,16 +863,13 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
data, err := json.Marshal(heartbeat)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
|
||||
}
|
||||
|
||||
// min returns the minimum of two float64 values
|
||||
@@ -894,26 +896,26 @@ func NewHeartbeatManager(electionMgr *ElectionManager) *HeartbeatManager {
|
||||
func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if hm.isRunning {
|
||||
hm.logger("Heartbeat already running")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
if !hm.electionMgr.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not admin, cannot start heartbeat")
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Starting admin heartbeat transmission")
|
||||
|
||||
|
||||
hm.stopCh = make(chan struct{})
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
hm.ticker = time.NewTicker(interval)
|
||||
hm.isRunning = true
|
||||
|
||||
|
||||
// Start heartbeat goroutine
|
||||
go hm.heartbeatLoop()
|
||||
|
||||
|
||||
hm.logger("Admin heartbeat started (interval: %v)", interval)
|
||||
return nil
|
||||
}
|
||||
@@ -922,22 +924,22 @@ func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
func (hm *HeartbeatManager) StopHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if !hm.isRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Stopping admin heartbeat transmission")
|
||||
|
||||
|
||||
// Signal stop
|
||||
close(hm.stopCh)
|
||||
|
||||
|
||||
// Stop ticker
|
||||
if hm.ticker != nil {
|
||||
hm.ticker.Stop()
|
||||
hm.ticker = nil
|
||||
}
|
||||
|
||||
|
||||
hm.isRunning = false
|
||||
hm.logger("Admin heartbeat stopped")
|
||||
return nil
|
||||
@@ -958,7 +960,7 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.mu.Unlock()
|
||||
hm.logger("Heartbeat loop terminated")
|
||||
}()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-hm.ticker.C:
|
||||
@@ -971,11 +973,11 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.logger("No longer admin, stopping heartbeat")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
case <-hm.stopCh:
|
||||
hm.logger("Heartbeat stop signal received")
|
||||
return
|
||||
|
||||
|
||||
case <-hm.electionMgr.ctx.Done():
|
||||
hm.logger("Election manager context cancelled")
|
||||
return
|
||||
@@ -987,19 +989,19 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
status := map[string]interface{}{
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
}
|
||||
|
||||
|
||||
if hm.isRunning && hm.ticker != nil {
|
||||
// Calculate next heartbeat time (approximate)
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
status["interval"] = interval.String()
|
||||
status["next_heartbeat"] = time.Now().Add(interval)
|
||||
}
|
||||
|
||||
|
||||
return status
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,451 +2,185 @@ package election
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
pubsubpkg "chorus/pubsub"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
)
|
||||
|
||||
func TestElectionManager_NewElectionManager(t *testing.T) {
|
||||
// newTestElectionManager wires a real libp2p host and PubSub instance so the
|
||||
// election manager exercises the same code paths used in production.
|
||||
func newTestElectionManager(t *testing.T) *ElectionManager {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("failed to create libp2p host: %v", err)
|
||||
}
|
||||
|
||||
ps, err := pubsubpkg.NewPubSub(ctx, host, "", "")
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
t.Fatalf("failed to create pubsub: %v", err)
|
||||
}
|
||||
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
ID: host.ID().String(),
|
||||
Role: "context_admin",
|
||||
Capabilities: []string{"admin_election", "context_curation"},
|
||||
Models: []string{"meta/llama-3.1-8b-instruct"},
|
||||
Specialization: "coordination",
|
||||
},
|
||||
Security: config.SecurityConfig{},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
if em == nil {
|
||||
t.Fatal("Expected NewElectionManager to return non-nil manager")
|
||||
}
|
||||
em := NewElectionManager(ctx, cfg, host, ps, host.ID().String())
|
||||
|
||||
if em.nodeID != "test-node" {
|
||||
t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
em.Stop()
|
||||
ps.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
})
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
func TestNewElectionManagerInitialState(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if em.state != StateIdle {
|
||||
t.Errorf("Expected initial state to be StateIdle, got %v", em.state)
|
||||
t.Fatalf("expected initial state %q, got %q", StateIdle, em.state)
|
||||
}
|
||||
|
||||
if em.currentTerm != 0 {
|
||||
t.Fatalf("expected initial term 0, got %d", em.currentTerm)
|
||||
}
|
||||
|
||||
if em.nodeID == "" {
|
||||
t.Fatal("expected nodeID to be populated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_StartElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestElectionManagerCanBeAdmin(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if !em.canBeAdmin() {
|
||||
t.Fatal("expected node to qualify for admin election")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Start election
|
||||
err := em.StartElection()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to start election: %v", err)
|
||||
}
|
||||
|
||||
// Verify state changed
|
||||
if em.state != StateCandidate {
|
||||
t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state)
|
||||
}
|
||||
|
||||
// Verify we added ourselves as a candidate
|
||||
em.mu.RLock()
|
||||
candidate, exists := em.candidates[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find ourselves as a candidate after starting election")
|
||||
}
|
||||
|
||||
if candidate.NodeID != em.nodeID {
|
||||
t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID)
|
||||
em.config.Agent.Capabilities = []string{"runtime_support"}
|
||||
if em.canBeAdmin() {
|
||||
t.Fatal("expected node without admin capabilities to be ineligible")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Vote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Vote for the candidate
|
||||
err := em.Vote("candidate-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to vote: %v", err)
|
||||
}
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find our vote after voting")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected vote to be for 'candidate-1', got %s", vote)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_VoteInvalidCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Try to vote for non-existent candidate
|
||||
err := em.Vote("non-existent")
|
||||
if err == nil {
|
||||
t.Error("Expected error when voting for non-existent candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_AddCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "new-candidate",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Verify candidate was added
|
||||
em.mu.RLock()
|
||||
stored, exists := em.candidates["new-candidate"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find added candidate")
|
||||
}
|
||||
|
||||
if stored.NodeID != "new-candidate" {
|
||||
t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID)
|
||||
}
|
||||
|
||||
if stored.Score != 0.7 {
|
||||
t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinner(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates with different scores
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-3",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
em.candidates = map[string]*AdminCandidate{
|
||||
"candidate-1": {
|
||||
NodeID: "candidate-1",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.65,
|
||||
},
|
||||
"candidate-2": {
|
||||
NodeID: "candidate-2",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.80,
|
||||
},
|
||||
}
|
||||
em.votes = map[string]string{
|
||||
"voter-a": "candidate-1",
|
||||
"voter-b": "candidate-2",
|
||||
"voter-c": "candidate-2",
|
||||
}
|
||||
|
||||
// Add some votes
|
||||
em.votes["voter-1"] = "candidate-2"
|
||||
em.votes["voter-2"] = "candidate-2"
|
||||
em.votes["voter-3"] = "candidate-1"
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
t.Fatal("expected a winner to be selected")
|
||||
}
|
||||
|
||||
// candidate-2 should win with most votes (2 votes)
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID)
|
||||
t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates but no votes - should fall back to highest score
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.9, // Highest score
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
func TestHandleElectionMessageAddsCandidate(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
}
|
||||
em.currentTerm = 3
|
||||
em.state = StateElecting
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner without any votes
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
}
|
||||
|
||||
// candidate-2 should win with highest score
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
NodeID: "peer-2",
|
||||
PeerID: em.host.ID(),
|
||||
Capabilities: []string{"admin_election"},
|
||||
Uptime: time.Second,
|
||||
Score: 0.75,
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal candidate: %v", err)
|
||||
}
|
||||
|
||||
var data map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &data); err != nil {
|
||||
t.Fatalf("failed to unmarshal candidate payload: %v", err)
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Create vote message
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: map[string]interface{}{
|
||||
"candidate": "candidate-1",
|
||||
},
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: "peer-2",
|
||||
Timestamp: time.Now(),
|
||||
Term: 3,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
// Handle the vote
|
||||
em.handleElectionVote(msg)
|
||||
serialized, err := json.Marshal(msg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal election message: %v", err)
|
||||
}
|
||||
|
||||
em.handleElectionMessage(serialized)
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes["voter-1"]
|
||||
_, exists := em.candidates["peer-2"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected vote to be recorded after handling vote message")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote)
|
||||
t.Fatal("expected candidacy announcement to register candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if err := em.SendAdminHeartbeat(); err == nil {
|
||||
t.Fatal("expected error when non-admin sends heartbeat")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Create vote message with invalid data
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: "invalid-data", // Should be map[string]interface{}
|
||||
if err := em.Start(); err != nil {
|
||||
t.Fatalf("failed to start election manager: %v", err)
|
||||
}
|
||||
|
||||
// Handle the vote - should not crash
|
||||
em.handleElectionVote(msg)
|
||||
|
||||
// Verify no vote was recorded
|
||||
em.mu.RLock()
|
||||
_, exists := em.votes["voter-1"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if exists {
|
||||
t.Error("Expected no vote to be recorded with invalid data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_CompleteElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Set up election state
|
||||
em.mu.Lock()
|
||||
em.state = StateCandidate
|
||||
em.currentTerm = 1
|
||||
em.currentAdmin = em.nodeID
|
||||
em.mu.Unlock()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "winner",
|
||||
Term: 1,
|
||||
Score: 0.9,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["winner"] = candidate
|
||||
em.mu.Unlock()
|
||||
|
||||
// Complete election
|
||||
em.CompleteElection()
|
||||
|
||||
// Verify state reset
|
||||
em.mu.RLock()
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
if state != StateIdle {
|
||||
t.Errorf("Expected state to be StateIdle after completing election, got %v", state)
|
||||
if err := em.SendAdminHeartbeat(); err != nil {
|
||||
t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Concurrency(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Test concurrent access to vote and candidate operations
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Run concurrent operations
|
||||
done := make(chan bool, 2)
|
||||
|
||||
// Concurrent voting
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.Vote("candidate-1") // Ignore errors in concurrent test
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Concurrent state checking
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.findElectionWinner() // Just check for races
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for completion
|
||||
for i := 0; i < 2; i++ {
|
||||
select {
|
||||
case <-done:
|
||||
case <-ctx.Done():
|
||||
t.Fatal("Concurrent test timed out")
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user