Implement chrs-election: Stateful weighted leader election following original CHORUS specs
This commit is contained in:
16
chrs-election/Cargo.toml
Normal file
16
chrs-election/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "chrs-election"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
chrs-mail = { path = "../chrs-mail" }
|
||||
chrs-discovery = { path = "../chrs-discovery" }
|
||||
chrs-council = { path = "../chrs-council" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "1.0"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
uuid = { version = "1.0", features = ["v4", "serde"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
rand = "0.8"
|
||||
261
chrs-election/src/lib.rs
Normal file
261
chrs-election/src/lib.rs
Normal file
@@ -0,0 +1,261 @@
|
||||
//! chrs-election: State-machine based leader election for CHORUS.
|
||||
|
||||
use chrs_discovery::{BusHandle, BusMessage};
|
||||
use chrono::{DateTime, Utc, Duration};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
const ELECTION_TOPIC: &str = "CHORUS/election/v1";
|
||||
const HEARTBEAT_TOPIC: &str = "CHORUS/admin/heartbeat/v1";
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ElectionState {
|
||||
Idle,
|
||||
Discovering,
|
||||
Electing,
|
||||
Complete,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct ResourceMetrics {
|
||||
pub cpu_usage: f64,
|
||||
pub memory_usage: f64,
|
||||
pub disk_usage: f64,
|
||||
pub network_quality: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct AdminCandidate {
|
||||
pub node_id: String,
|
||||
pub capabilities: Vec<String>,
|
||||
pub uptime_secs: u64,
|
||||
pub resources: ResourceMetrics,
|
||||
pub experience_secs: u64,
|
||||
pub score: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
pub enum ElectionMessage {
|
||||
#[serde(rename = "admin_discovery_request")]
|
||||
DiscoveryRequest { node_id: String },
|
||||
#[serde(rename = "admin_discovery_response")]
|
||||
DiscoveryResponse { node_id: String, current_admin: String },
|
||||
#[serde(rename = "election_started")]
|
||||
ElectionStarted { node_id: String, term: u64 },
|
||||
#[serde(rename = "candidacy_announcement")]
|
||||
Candidacy { term: u64, candidate: AdminCandidate },
|
||||
#[serde(rename = "election_winner")]
|
||||
ElectionWinner { term: u64, winner_id: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ElectionError {
|
||||
#[error("Bus error: {0}")]
|
||||
Bus(String),
|
||||
#[error("Serialization error: {0}")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
}
|
||||
|
||||
pub struct ElectionManager {
|
||||
pub node_id: String,
|
||||
state: Arc<RwLock<ElectionState>>,
|
||||
current_term: Arc<RwLock<u64>>,
|
||||
current_admin: Arc<RwLock<Option<String>>>,
|
||||
last_heartbeat: Arc<RwLock<DateTime<Utc>>>,
|
||||
candidates: Arc<RwLock<HashMap<String, AdminCandidate>>>,
|
||||
bus: BusHandle,
|
||||
start_time: DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl ElectionManager {
|
||||
pub fn new(node_id: &str, bus: BusHandle) -> Self {
|
||||
Self {
|
||||
node_id: node_id.to_string(),
|
||||
state: Arc::new(RwLock::new(ElectionState::Idle)),
|
||||
current_term: Arc::new(RwLock::new(0)),
|
||||
current_admin: Arc::new(RwLock::new(None)),
|
||||
last_heartbeat: Arc::new(RwLock::new(Utc::now())),
|
||||
candidates: Arc::new(RwLock::new(HashMap::new())),
|
||||
bus,
|
||||
start_time: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_leader(&self) -> bool {
|
||||
let admin = self.current_admin.read().unwrap();
|
||||
admin.as_ref() == Some(&self.node_id)
|
||||
}
|
||||
|
||||
pub fn calculate_score(&self, metrics: &ResourceMetrics, capabilities: &[String]) -> f64 {
|
||||
let uptime = Utc::now().signed_duration_since(self.start_time).num_seconds() as f64;
|
||||
let uptime_score = (uptime / 86400.0).min(1.0); // Normalized to 24h
|
||||
|
||||
let mut cap_score: f64 = 0.0;
|
||||
for cap in capabilities {
|
||||
match cap.as_str() {
|
||||
"project_manager" | "context_curation" => cap_score += 0.35,
|
||||
"admin_election" | "semantic_analysis" => cap_score += 0.25,
|
||||
_ => cap_score += 0.1,
|
||||
}
|
||||
}
|
||||
cap_score = cap_score.min(1.0);
|
||||
|
||||
let res_score = (1.0 - metrics.cpu_usage) * 0.3 +
|
||||
(1.0 - metrics.memory_usage) * 0.3 +
|
||||
(1.0 - metrics.disk_usage) * 0.2 +
|
||||
metrics.network_quality * 0.2;
|
||||
|
||||
uptime_score * 0.3 + cap_score * 0.2 + res_score * 0.2 + metrics.network_quality * 0.15 + (uptime_score * 0.15)
|
||||
}
|
||||
|
||||
pub async fn process_message(&self, msg: &BusMessage) -> Result<(), ElectionError> {
|
||||
if msg.topic == HEARTBEAT_TOPIC {
|
||||
let heartbeat: serde_json::Value = serde_json::from_slice(&msg.payload)?;
|
||||
if let Some(admin_id) = heartbeat["node_id"].as_str() {
|
||||
let mut admin = self.current_admin.write().unwrap();
|
||||
let mut last_hb = self.last_heartbeat.write().unwrap();
|
||||
*admin = Some(admin_id.to_string());
|
||||
*last_hb = Utc::now();
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if msg.topic == ELECTION_TOPIC {
|
||||
let election_msg: ElectionMessage = serde_json::from_slice(&msg.payload)?;
|
||||
match election_msg {
|
||||
ElectionMessage::DiscoveryRequest { node_id: _ } => {
|
||||
let admin = self.current_admin.read().unwrap();
|
||||
if let Some(ref admin_id) = *admin {
|
||||
let response = ElectionMessage::DiscoveryResponse {
|
||||
node_id: self.node_id.clone(),
|
||||
current_admin: admin_id.clone(),
|
||||
};
|
||||
let _ = self.bus.publish(ELECTION_TOPIC, serde_json::to_vec(&response)?);
|
||||
}
|
||||
}
|
||||
ElectionMessage::DiscoveryResponse { current_admin, .. } => {
|
||||
let mut admin = self.current_admin.write().unwrap();
|
||||
if admin.is_none() {
|
||||
*admin = Some(current_admin);
|
||||
}
|
||||
}
|
||||
ElectionMessage::ElectionStarted { term, .. } => {
|
||||
let mut current_term = self.current_term.write().unwrap();
|
||||
if term > *current_term {
|
||||
*current_term = term;
|
||||
let mut state = self.state.write().unwrap();
|
||||
*state = ElectionState::Electing;
|
||||
}
|
||||
}
|
||||
ElectionMessage::Candidacy { term, candidate } => {
|
||||
let current_term = self.current_term.read().unwrap();
|
||||
if term == *current_term {
|
||||
let mut candidates = self.candidates.write().unwrap();
|
||||
candidates.insert(candidate.node_id.clone(), candidate);
|
||||
}
|
||||
}
|
||||
ElectionMessage::ElectionWinner { term, winner_id } => {
|
||||
let mut current_term = self.current_term.write().unwrap();
|
||||
if term >= *current_term {
|
||||
*current_term = term;
|
||||
let mut admin = self.current_admin.write().unwrap();
|
||||
*admin = Some(winner_id);
|
||||
let mut state = self.state.write().unwrap();
|
||||
*state = ElectionState::Idle;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn run_step(&self) -> Result<(), ElectionError> {
|
||||
let state = *self.state.read().unwrap();
|
||||
let last_hb = *self.last_heartbeat.read().unwrap();
|
||||
let now = Utc::now();
|
||||
|
||||
match state {
|
||||
ElectionState::Idle => {
|
||||
if now.signed_duration_since(last_hb) > Duration::seconds(15) {
|
||||
println!("[ELECTION] Heartbeat timeout! Triggering discovery...");
|
||||
let mut state_w = self.state.write().unwrap();
|
||||
*state_w = ElectionState::Discovering;
|
||||
|
||||
let req = ElectionMessage::DiscoveryRequest { node_id: self.node_id.clone() };
|
||||
let _ = self.bus.publish(ELECTION_TOPIC, serde_json::to_vec(&req)?);
|
||||
}
|
||||
}
|
||||
ElectionState::Discovering => {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
let admin = self.current_admin.read().unwrap();
|
||||
if admin.is_none() {
|
||||
println!("[ELECTION] No admin discovered. Starting election...");
|
||||
let mut state_w = self.state.write().unwrap();
|
||||
*state_w = ElectionState::Electing;
|
||||
let mut term_w = self.current_term.write().unwrap();
|
||||
*term_w += 1;
|
||||
|
||||
let start_msg = ElectionMessage::ElectionStarted {
|
||||
node_id: self.node_id.clone(),
|
||||
term: *term_w,
|
||||
};
|
||||
let _ = self.bus.publish(ELECTION_TOPIC, serde_json::to_vec(&start_msg)?);
|
||||
} else {
|
||||
let mut state_w = self.state.write().unwrap();
|
||||
*state_w = ElectionState::Idle;
|
||||
}
|
||||
}
|
||||
ElectionState::Electing => {
|
||||
let metrics = ResourceMetrics {
|
||||
cpu_usage: 0.1,
|
||||
memory_usage: 0.2,
|
||||
disk_usage: 0.1,
|
||||
network_quality: 0.95,
|
||||
};
|
||||
let score = self.calculate_score(&metrics, &["admin_election".to_string()]);
|
||||
let candidate = AdminCandidate {
|
||||
node_id: self.node_id.clone(),
|
||||
capabilities: vec!["admin_election".to_string()],
|
||||
uptime_secs: now.signed_duration_since(self.start_time).num_seconds() as u64,
|
||||
resources: metrics,
|
||||
experience_secs: 0,
|
||||
score,
|
||||
};
|
||||
let term = *self.current_term.read().unwrap();
|
||||
let candidacy_msg = ElectionMessage::Candidacy { term, candidate };
|
||||
let _ = self.bus.publish(ELECTION_TOPIC, serde_json::to_vec(&candidacy_msg)?);
|
||||
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
|
||||
let candidates = self.candidates.read().unwrap();
|
||||
if let Some(winner) = candidates.values().max_by(|a, b| a.score.partial_cmp(&b.score).unwrap()) {
|
||||
println!("[ELECTION] Election complete. Winner: {}", winner.node_id);
|
||||
let winner_msg = ElectionMessage::ElectionWinner {
|
||||
term,
|
||||
winner_id: winner.node_id.clone(),
|
||||
};
|
||||
let _ = self.bus.publish(ELECTION_TOPIC, serde_json::to_vec(&winner_msg)?);
|
||||
}
|
||||
|
||||
let mut state_w = self.state.write().unwrap();
|
||||
*state_w = ElectionState::Idle;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn send_heartbeat(&self) -> Result<(), ElectionError> {
|
||||
if self.is_leader() {
|
||||
let heartbeat = serde_json::json!({
|
||||
"node_id": self.node_id,
|
||||
"timestamp": Utc::now().to_rfc3339()
|
||||
});
|
||||
let _ = self.bus.publish(HEARTBEAT_TOPIC, serde_json::to_vec(&heartbeat)?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user