Harden CHORUS security and messaging stack
This commit is contained in:
@@ -19,8 +19,8 @@ import (
|
||||
type ElectionTrigger string
|
||||
|
||||
const (
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
||||
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
||||
TriggerManual ElectionTrigger = "manual_trigger"
|
||||
@@ -30,30 +30,35 @@ const (
|
||||
type ElectionState string
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
electionTopic = "CHORUS/election/v1"
|
||||
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
StateReconstructing ElectionState = "reconstructing_keys"
|
||||
StateComplete ElectionState = "complete"
|
||||
StateComplete ElectionState = "complete"
|
||||
)
|
||||
|
||||
// AdminCandidate represents a node candidate for admin role
|
||||
type AdminCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ResourceMetrics holds node resource information for election scoring
|
||||
type ResourceMetrics struct {
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
NetworkQuality float64 `json:"network_quality"`
|
||||
}
|
||||
|
||||
@@ -68,46 +73,46 @@ type ElectionMessage struct {
|
||||
|
||||
// ElectionManager handles admin election coordination
|
||||
type ElectionManager struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
// Election state
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
// Timers and channels
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
// Heartbeat management
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
// Callbacks
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onElectionComplete func(winner string)
|
||||
|
||||
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// HeartbeatManager manages admin heartbeat lifecycle
|
||||
type HeartbeatManager struct {
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
}
|
||||
|
||||
// NewElectionManager creates a new election manager
|
||||
@@ -119,7 +124,7 @@ func NewElectionManager(
|
||||
nodeID string,
|
||||
) *ElectionManager {
|
||||
electionCtx, cancel := context.WithCancel(ctx)
|
||||
|
||||
|
||||
em := &ElectionManager{
|
||||
ctx: electionCtx,
|
||||
cancel: cancel,
|
||||
@@ -133,7 +138,7 @@ func NewElectionManager(
|
||||
electionTrigger: make(chan ElectionTrigger, 10),
|
||||
startTime: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
// Initialize heartbeat manager
|
||||
em.heartbeatManager = &HeartbeatManager{
|
||||
electionMgr: em,
|
||||
@@ -141,29 +146,32 @@ func NewElectionManager(
|
||||
log.Printf("[HEARTBEAT] "+msg, args...)
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
// Start begins the election management system
|
||||
func (em *ElectionManager) Start() error {
|
||||
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
||||
|
||||
// TODO: Subscribe to election-related messages - pubsub interface needs update
|
||||
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
// }
|
||||
//
|
||||
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
// }
|
||||
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleElectionMessage(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
}
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleAdminHeartbeat(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// Start discovery process
|
||||
go em.startDiscoveryLoop()
|
||||
|
||||
|
||||
// Start election coordinator
|
||||
go em.electionCoordinator()
|
||||
|
||||
|
||||
// Start heartbeat if this node is already admin at startup
|
||||
if em.IsCurrentAdmin() {
|
||||
go func() {
|
||||
@@ -174,7 +182,7 @@ func (em *ElectionManager) Start() error {
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
|
||||
log.Printf("✅ Election manager started")
|
||||
return nil
|
||||
}
|
||||
@@ -182,17 +190,17 @@ func (em *ElectionManager) Start() error {
|
||||
// Stop shuts down the election manager
|
||||
func (em *ElectionManager) Stop() {
|
||||
log.Printf("🛑 Stopping election manager")
|
||||
|
||||
|
||||
// Stop heartbeat first
|
||||
if em.heartbeatManager != nil {
|
||||
em.heartbeatManager.StopHeartbeat()
|
||||
}
|
||||
|
||||
|
||||
em.cancel()
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.heartbeatTimer != nil {
|
||||
em.heartbeatTimer.Stop()
|
||||
}
|
||||
@@ -255,7 +263,7 @@ func (em *ElectionManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
// startDiscoveryLoop starts the admin discovery loop
|
||||
func (em *ElectionManager) startDiscoveryLoop() {
|
||||
log.Printf("🔍 Starting admin discovery loop")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -272,19 +280,19 @@ func (em *ElectionManager) performAdminDiscovery() {
|
||||
currentState := em.state
|
||||
lastHeartbeat := em.lastHeartbeat
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Only discover if we're idle or the heartbeat is stale
|
||||
if currentState != StateIdle {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Check if admin heartbeat has timed out
|
||||
if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout {
|
||||
log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat)
|
||||
em.TriggerElection(TriggerHeartbeatTimeout)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// If we haven't heard from an admin recently, try to discover one
|
||||
if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 {
|
||||
em.sendDiscoveryRequest()
|
||||
@@ -298,7 +306,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(discoveryMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery request: %v", err)
|
||||
}
|
||||
@@ -307,7 +315,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
||||
// electionCoordinator handles the main election logic
|
||||
func (em *ElectionManager) electionCoordinator() {
|
||||
log.Printf("🎯 Election coordinator started")
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-em.ctx.Done():
|
||||
@@ -321,17 +329,17 @@ func (em *ElectionManager) electionCoordinator() {
|
||||
// handleElectionTrigger processes election triggers
|
||||
func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
log.Printf("🔥 Processing election trigger: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
currentState := em.state
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Ignore triggers if we're already in an election
|
||||
if currentState != StateIdle {
|
||||
log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Begin election process
|
||||
em.beginElection(trigger)
|
||||
}
|
||||
@@ -339,7 +347,7 @@ func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||
// beginElection starts a new election
|
||||
func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
log.Printf("🗳️ Beginning election due to: %s", trigger)
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateElecting
|
||||
em.currentTerm++
|
||||
@@ -347,12 +355,12 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
// Announce candidacy if this node can be admin
|
||||
if em.canBeAdmin() {
|
||||
em.announceCandidacy(term)
|
||||
}
|
||||
|
||||
|
||||
// Send election announcement
|
||||
electionMsg := ElectionMessage{
|
||||
Type: "election_started",
|
||||
@@ -363,11 +371,11 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||
"trigger": string(trigger),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(electionMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election start: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Start election timeout
|
||||
em.startElectionTimeout(term)
|
||||
}
|
||||
@@ -386,7 +394,7 @@ func (em *ElectionManager) canBeAdmin() bool {
|
||||
// announceCandidacy announces this node as an election candidate
|
||||
func (em *ElectionManager) announceCandidacy(term int) {
|
||||
uptime := time.Since(em.startTime)
|
||||
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: em.nodeID,
|
||||
PeerID: em.host.ID(),
|
||||
@@ -396,13 +404,13 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Experience: uptime, // For now, use uptime as experience
|
||||
Metadata: map[string]interface{}{
|
||||
"specialization": em.config.Agent.Specialization,
|
||||
"models": em.config.Agent.Models,
|
||||
"models": em.config.Agent.Models,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
// Calculate candidate score
|
||||
candidate.Score = em.calculateCandidateScore(candidate)
|
||||
|
||||
|
||||
candidacyMsg := ElectionMessage{
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: em.nodeID,
|
||||
@@ -410,9 +418,9 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Term: term,
|
||||
Data: candidate,
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📢 Announcing candidacy (score: %.2f)", candidate.Score)
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(candidacyMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce candidacy: %v", err)
|
||||
}
|
||||
@@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics {
|
||||
// TODO: Implement actual resource collection
|
||||
// For now, return simulated values
|
||||
return ResourceMetrics{
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
||||
}
|
||||
}
|
||||
@@ -435,10 +443,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
// TODO: Add LeadershipScoring to config.ElectionConfig
|
||||
// scoring := em.config.Security.ElectionConfig.LeadershipScoring
|
||||
// Default scoring weights handled inline
|
||||
|
||||
|
||||
// Normalize metrics to 0-1 range
|
||||
uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score
|
||||
|
||||
|
||||
// Capability score - higher for admin/coordination capabilities
|
||||
capabilityScore := 0.0
|
||||
adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"}
|
||||
@@ -455,22 +463,22 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
}
|
||||
}
|
||||
capabilityScore = min(1.0, capabilityScore)
|
||||
|
||||
|
||||
// Resource score - lower usage is better
|
||||
resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.MemoryUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.DiskUsage) * 0.2 +
|
||||
candidate.Resources.NetworkQuality * 0.2
|
||||
|
||||
resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 +
|
||||
(1.0-candidate.Resources.MemoryUsage)*0.3 +
|
||||
(1.0-candidate.Resources.DiskUsage)*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.2
|
||||
|
||||
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
||||
|
||||
|
||||
// Weighted final score (using default weights)
|
||||
finalScore := uptimeScore*0.3 +
|
||||
capabilityScore*0.2 +
|
||||
resourceScore*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.15 +
|
||||
experienceScore*0.15
|
||||
|
||||
|
||||
return finalScore
|
||||
}
|
||||
|
||||
@@ -478,11 +486,11 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
if em.electionTimer != nil {
|
||||
em.electionTimer.Stop()
|
||||
}
|
||||
|
||||
|
||||
em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() {
|
||||
em.completeElection(term)
|
||||
})
|
||||
@@ -492,15 +500,15 @@ func (em *ElectionManager) startElectionTimeout(term int) {
|
||||
func (em *ElectionManager) completeElection(term int) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Verify this is still the current term
|
||||
if term != em.currentTerm {
|
||||
log.Printf("⏰ Election timeout for old term %d, ignoring", term)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("⏰ Election timeout reached, tallying votes")
|
||||
|
||||
|
||||
// Find the winning candidate
|
||||
winner := em.findElectionWinner()
|
||||
if winner == nil {
|
||||
@@ -513,14 +521,14 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
}()
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("🏆 Election winner: %s (score: %.2f)", winner.NodeID, winner.Score)
|
||||
|
||||
|
||||
// Update admin
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateComplete
|
||||
|
||||
|
||||
// Announce the winner
|
||||
winnerMsg := ElectionMessage{
|
||||
Type: "election_winner",
|
||||
@@ -529,16 +537,16 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
Term: term,
|
||||
Data: winner,
|
||||
}
|
||||
|
||||
|
||||
em.mu.Unlock() // Unlock before publishing
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(winnerMsg); err != nil {
|
||||
log.Printf("❌ Failed to announce election winner: %v", err)
|
||||
}
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callbacks
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -546,7 +554,7 @@ func (em *ElectionManager) completeElection(term int) {
|
||||
if em.onElectionComplete != nil {
|
||||
em.onElectionComplete(winner.NodeID)
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
em.state = StateIdle // Reset state for next election
|
||||
}
|
||||
@@ -556,16 +564,16 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
if len(em.candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// Count votes for each candidate
|
||||
voteCounts := make(map[string]int)
|
||||
totalVotes := 0
|
||||
|
||||
|
||||
// Initialize vote counts for all candidates
|
||||
for candidateID := range em.candidates {
|
||||
voteCounts[candidateID] = 0
|
||||
}
|
||||
|
||||
|
||||
// Tally actual votes
|
||||
for _, candidateID := range em.votes {
|
||||
if _, exists := em.candidates[candidateID]; exists {
|
||||
@@ -573,12 +581,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
totalVotes++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If no votes cast, fall back to highest scoring candidate
|
||||
if totalVotes == 0 {
|
||||
var winner *AdminCandidate
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for _, candidate := range em.candidates {
|
||||
if candidate.Score > highestScore {
|
||||
highestScore = candidate.Score
|
||||
@@ -587,12 +595,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
}
|
||||
return winner
|
||||
}
|
||||
|
||||
|
||||
// Find candidate with most votes
|
||||
var winner *AdminCandidate
|
||||
maxVotes := -1
|
||||
highestScore := -1.0
|
||||
|
||||
|
||||
for candidateID, voteCount := range voteCounts {
|
||||
candidate := em.candidates[candidateID]
|
||||
if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) {
|
||||
@@ -601,10 +609,10 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
||||
winner = candidate
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
|
||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||
totalVotes, winner.NodeID, maxVotes, winner.Score)
|
||||
|
||||
|
||||
return winner
|
||||
}
|
||||
|
||||
@@ -615,12 +623,12 @@ func (em *ElectionManager) handleElectionMessage(data []byte) {
|
||||
log.Printf("❌ Failed to unmarshal election message: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Ignore messages from ourselves
|
||||
if msg.NodeID == em.nodeID {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
switch msg.Type {
|
||||
case "admin_discovery_request":
|
||||
em.handleAdminDiscoveryRequest(msg)
|
||||
@@ -643,7 +651,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
currentAdmin := em.currentAdmin
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
|
||||
// Only respond if we know who the current admin is and we're idle
|
||||
if currentAdmin != "" && state == StateIdle {
|
||||
responseMsg := ElectionMessage{
|
||||
@@ -654,7 +662,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
||||
"current_admin": currentAdmin,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if err := em.publishElectionMessage(responseMsg); err != nil {
|
||||
log.Printf("❌ Failed to send admin discovery response: %v", err)
|
||||
}
|
||||
@@ -679,7 +687,7 @@ func (em *ElectionManager) handleAdminDiscoveryResponse(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// If we receive an election start with a higher term, join the election
|
||||
if msg.Term > em.currentTerm {
|
||||
log.Printf("🔄 Joining election with term %d", msg.Term)
|
||||
@@ -687,7 +695,7 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
em.state = StateElecting
|
||||
em.candidates = make(map[string]*AdminCandidate)
|
||||
em.votes = make(map[string]string)
|
||||
|
||||
|
||||
// Announce candidacy if eligible
|
||||
if em.canBeAdmin() {
|
||||
go em.announceCandidacy(msg.Term)
|
||||
@@ -699,25 +707,25 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Only process if it's for the current term
|
||||
if msg.Term != em.currentTerm {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Convert data to candidate struct
|
||||
candidateData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
log.Printf("❌ Failed to marshal candidate data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var candidate AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &candidate); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal candidate: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
log.Printf("📝 Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score)
|
||||
em.candidates[candidate.NodeID] = &candidate
|
||||
}
|
||||
@@ -726,31 +734,31 @@ func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||
func (em *ElectionManager) handleElectionVote(msg ElectionMessage) {
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Extract vote data
|
||||
voteData, ok := msg.Data.(map[string]interface{})
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid vote data format from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
candidateID, ok := voteData["candidate"].(string)
|
||||
if !ok {
|
||||
log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Validate candidate exists
|
||||
if _, exists := em.candidates[candidateID]; !exists {
|
||||
log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Prevent duplicate voting
|
||||
if existingVote, exists := em.votes[msg.NodeID]; exists {
|
||||
log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID)
|
||||
}
|
||||
|
||||
|
||||
// Record the vote
|
||||
em.votes[msg.NodeID] = candidateID
|
||||
log.Printf("🗳️ Recorded vote from %s for candidate %s", msg.NodeID, candidateID)
|
||||
@@ -763,24 +771,24 @@ func (em *ElectionManager) handleElectionWinner(msg ElectionMessage) {
|
||||
log.Printf("❌ Failed to marshal winner data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
var winner AdminCandidate
|
||||
if err := json.Unmarshal(candidateData, &winner); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal winner: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
oldAdmin := em.currentAdmin
|
||||
em.currentAdmin = winner.NodeID
|
||||
em.state = StateIdle
|
||||
em.mu.Unlock()
|
||||
|
||||
|
||||
log.Printf("👑 New admin elected: %s", winner.NodeID)
|
||||
|
||||
|
||||
// Handle heartbeat lifecycle based on admin change
|
||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||
|
||||
|
||||
// Trigger callback
|
||||
if em.onAdminChanged != nil {
|
||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||
@@ -796,7 +804,7 @@ func (em *ElectionManager) handleHeartbeatTransition(oldAdmin, newAdmin string)
|
||||
log.Printf("⚠️ Error stopping heartbeat: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If we gained admin role, start heartbeat
|
||||
if newAdmin == em.nodeID && oldAdmin != em.nodeID {
|
||||
log.Printf("🔄 Gained admin role, starting heartbeat")
|
||||
@@ -816,15 +824,15 @@ func (em *ElectionManager) handleAdminHeartbeat(data []byte) {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
}
|
||||
|
||||
|
||||
if err := json.Unmarshal(data, &heartbeat); err != nil {
|
||||
log.Printf("❌ Failed to unmarshal heartbeat: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
em.mu.Lock()
|
||||
defer em.mu.Unlock()
|
||||
|
||||
|
||||
// Update admin and heartbeat timestamp
|
||||
if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID {
|
||||
em.currentAdmin = heartbeat.NodeID
|
||||
@@ -838,11 +846,8 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal election message: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/election/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(electionTopic, data)
|
||||
}
|
||||
|
||||
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
||||
@@ -850,7 +855,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
if !em.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not current admin")
|
||||
}
|
||||
|
||||
|
||||
heartbeat := struct {
|
||||
NodeID string `json:"node_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
@@ -858,16 +863,13 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
NodeID: em.nodeID,
|
||||
Timestamp: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
data, err := json.Marshal(heartbeat)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
|
||||
return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
|
||||
}
|
||||
|
||||
// min returns the minimum of two float64 values
|
||||
@@ -894,26 +896,26 @@ func NewHeartbeatManager(electionMgr *ElectionManager) *HeartbeatManager {
|
||||
func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if hm.isRunning {
|
||||
hm.logger("Heartbeat already running")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
if !hm.electionMgr.IsCurrentAdmin() {
|
||||
return fmt.Errorf("not admin, cannot start heartbeat")
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Starting admin heartbeat transmission")
|
||||
|
||||
|
||||
hm.stopCh = make(chan struct{})
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
hm.ticker = time.NewTicker(interval)
|
||||
hm.isRunning = true
|
||||
|
||||
|
||||
// Start heartbeat goroutine
|
||||
go hm.heartbeatLoop()
|
||||
|
||||
|
||||
hm.logger("Admin heartbeat started (interval: %v)", interval)
|
||||
return nil
|
||||
}
|
||||
@@ -922,22 +924,22 @@ func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||
func (hm *HeartbeatManager) StopHeartbeat() error {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
if !hm.isRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
hm.logger("Stopping admin heartbeat transmission")
|
||||
|
||||
|
||||
// Signal stop
|
||||
close(hm.stopCh)
|
||||
|
||||
|
||||
// Stop ticker
|
||||
if hm.ticker != nil {
|
||||
hm.ticker.Stop()
|
||||
hm.ticker = nil
|
||||
}
|
||||
|
||||
|
||||
hm.isRunning = false
|
||||
hm.logger("Admin heartbeat stopped")
|
||||
return nil
|
||||
@@ -958,7 +960,7 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.mu.Unlock()
|
||||
hm.logger("Heartbeat loop terminated")
|
||||
}()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-hm.ticker.C:
|
||||
@@ -971,11 +973,11 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
hm.logger("No longer admin, stopping heartbeat")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
case <-hm.stopCh:
|
||||
hm.logger("Heartbeat stop signal received")
|
||||
return
|
||||
|
||||
|
||||
case <-hm.electionMgr.ctx.Done():
|
||||
hm.logger("Election manager context cancelled")
|
||||
return
|
||||
@@ -987,19 +989,19 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
||||
func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
hm.mu.Lock()
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
|
||||
status := map[string]interface{}{
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
}
|
||||
|
||||
|
||||
if hm.isRunning && hm.ticker != nil {
|
||||
// Calculate next heartbeat time (approximate)
|
||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||
status["interval"] = interval.String()
|
||||
status["next_heartbeat"] = time.Now().Add(interval)
|
||||
}
|
||||
|
||||
|
||||
return status
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user