Harden CHORUS security and messaging stack

This commit is contained in:
anthonyrawlins
2025-09-20 23:21:35 +10:00
parent 57751f277a
commit 1bb736c09a
25 changed files with 2793 additions and 2474 deletions

View File

@@ -1,6 +1,7 @@
package logging
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
@@ -8,6 +9,7 @@ import (
"sync"
"time"
"chorus/pkg/shhh"
"github.com/libp2p/go-libp2p/core/peer"
)
@@ -23,12 +25,14 @@ type HypercoreLog struct {
entries []LogEntry
mutex sync.RWMutex
peerID peer.ID
// Verification chain
headHash string
// Replication
replicators map[peer.ID]*Replicator
redactor *shhh.Sentinel
}
// LogEntry represents a single entry in the distributed log
@@ -48,12 +52,12 @@ type LogType string
const (
// Bzzz coordination logs
TaskAnnounced LogType = "task_announced"
TaskClaimed LogType = "task_claimed"
TaskProgress LogType = "task_progress"
TaskCompleted LogType = "task_completed"
TaskFailed LogType = "task_failed"
TaskAnnounced LogType = "task_announced"
TaskClaimed LogType = "task_claimed"
TaskProgress LogType = "task_progress"
TaskCompleted LogType = "task_completed"
TaskFailed LogType = "task_failed"
// HMMM meta-discussion logs
PlanProposed LogType = "plan_proposed"
ObjectionRaised LogType = "objection_raised"
@@ -65,17 +69,17 @@ const (
TaskHelpReceived LogType = "task_help_received"
// System logs
PeerJoined LogType = "peer_joined"
PeerLeft LogType = "peer_left"
PeerJoined LogType = "peer_joined"
PeerLeft LogType = "peer_left"
CapabilityBcast LogType = "capability_broadcast"
NetworkEvent LogType = "network_event"
NetworkEvent LogType = "network_event"
)
// Replicator handles log replication with other peers
type Replicator struct {
peerID peer.ID
peerID peer.ID
lastSyncIndex uint64
connected bool
connected bool
}
// NewHypercoreLog creates a new distributed log for a peer
@@ -88,6 +92,13 @@ func NewHypercoreLog(peerID peer.ID) *HypercoreLog {
}
}
// SetRedactor wires the SHHH sentinel so log payloads are sanitized before persistence.
func (h *HypercoreLog) SetRedactor(redactor *shhh.Sentinel) {
h.mutex.Lock()
defer h.mutex.Unlock()
h.redactor = redactor
}
// AppendString is a convenience method for string log types (to match interface)
func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error {
_, err := h.Append(LogType(logType), data)
@@ -98,38 +109,40 @@ func (h *HypercoreLog) AppendString(logType string, data map[string]interface{})
func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*LogEntry, error) {
h.mutex.Lock()
defer h.mutex.Unlock()
index := uint64(len(h.entries))
sanitized := h.redactData(logType, data)
entry := LogEntry{
Index: index,
Timestamp: time.Now(),
Author: h.peerID.String(),
Type: logType,
Data: data,
Data: sanitized,
PrevHash: h.headHash,
}
// Calculate hash
entryHash, err := h.calculateEntryHash(entry)
if err != nil {
return nil, fmt.Errorf("failed to calculate entry hash: %w", err)
}
entry.Hash = entryHash
// Add simple signature (in production, use proper cryptographic signatures)
entry.Signature = h.createSignature(entry)
// Append to log
h.entries = append(h.entries, entry)
h.headHash = entryHash
fmt.Printf("📝 Log entry appended: %s [%d] by %s\n",
fmt.Printf("📝 Log entry appended: %s [%d] by %s\n",
logType, index, h.peerID.ShortString())
// Trigger replication to connected peers
go h.replicateEntry(entry)
return &entry, nil
}
@@ -137,11 +150,11 @@ func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*Lo
func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
if index >= uint64(len(h.entries)) {
return nil, fmt.Errorf("entry %d not found", index)
}
return &h.entries[index], nil
}
@@ -149,7 +162,7 @@ func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) {
func (h *HypercoreLog) Length() uint64 {
h.mutex.RLock()
defer h.mutex.RUnlock()
return uint64(len(h.entries))
}
@@ -157,22 +170,22 @@ func (h *HypercoreLog) Length() uint64 {
func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
if start >= uint64(len(h.entries)) {
return nil, fmt.Errorf("start index %d out of range", start)
}
if end > uint64(len(h.entries)) {
end = uint64(len(h.entries))
}
if start > end {
return nil, fmt.Errorf("invalid range: start %d > end %d", start, end)
}
result := make([]LogEntry, end-start)
copy(result, h.entries[start:end])
return result, nil
}
@@ -180,14 +193,14 @@ func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) {
func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
var result []LogEntry
for _, entry := range h.entries {
if entry.Type == logType {
result = append(result, entry)
}
}
return result, nil
}
@@ -195,14 +208,14 @@ func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) {
func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
var result []LogEntry
for _, entry := range h.entries {
if entry.Author == author {
result = append(result, entry)
}
}
return result, nil
}
@@ -210,20 +223,20 @@ func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) {
func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
totalEntries := len(h.entries)
if count <= 0 || totalEntries == 0 {
return []LogEntry{}, nil
}
start := 0
if totalEntries > count {
start = totalEntries - count
}
result := make([]LogEntry, totalEntries-start)
copy(result, h.entries[start:])
return result, nil
}
@@ -231,14 +244,14 @@ func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) {
func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) {
h.mutex.RLock()
defer h.mutex.RUnlock()
if sinceIndex >= uint64(len(h.entries)) {
return []LogEntry{}, nil
}
result := make([]LogEntry, len(h.entries)-int(sinceIndex))
copy(result, h.entries[sinceIndex:])
return result, nil
}
@@ -246,27 +259,27 @@ func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) {
func (h *HypercoreLog) VerifyIntegrity() error {
h.mutex.RLock()
defer h.mutex.RUnlock()
var prevHash string
for i, entry := range h.entries {
// Verify previous hash link
if entry.PrevHash != prevHash {
return fmt.Errorf("integrity error at entry %d: prev_hash mismatch", i)
}
// Verify entry hash
calculatedHash, err := h.calculateEntryHash(entry)
if err != nil {
return fmt.Errorf("failed to calculate hash for entry %d: %w", i, err)
}
if entry.Hash != calculatedHash {
return fmt.Errorf("integrity error at entry %d: hash mismatch", i)
}
prevHash = entry.Hash
}
return nil
}
@@ -274,13 +287,13 @@ func (h *HypercoreLog) VerifyIntegrity() error {
func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
h.mutex.Lock()
defer h.mutex.Unlock()
h.replicators[peerID] = &Replicator{
peerID: peerID,
peerID: peerID,
lastSyncIndex: 0,
connected: true,
connected: true,
}
fmt.Printf("🔄 Added replicator: %s\n", peerID.ShortString())
}
@@ -288,7 +301,7 @@ func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
func (h *HypercoreLog) RemoveReplicator(peerID peer.ID) {
h.mutex.Lock()
defer h.mutex.Unlock()
delete(h.replicators, peerID)
fmt.Printf("🔄 Removed replicator: %s\n", peerID.ShortString())
}
@@ -303,10 +316,10 @@ func (h *HypercoreLog) replicateEntry(entry LogEntry) {
}
}
h.mutex.RUnlock()
for _, replicator := range replicators {
// In a real implementation, this would send the entry over the network
fmt.Printf("🔄 Replicating entry %d to %s\n",
fmt.Printf("🔄 Replicating entry %d to %s\n",
entry.Index, replicator.peerID.ShortString())
}
}
@@ -322,16 +335,75 @@ func (h *HypercoreLog) calculateEntryHash(entry LogEntry) (string, error) {
Data: entry.Data,
PrevHash: entry.PrevHash,
}
entryBytes, err := json.Marshal(entryForHash)
if err != nil {
return "", err
}
hash := sha256.Sum256(entryBytes)
return hex.EncodeToString(hash[:]), nil
}
func (h *HypercoreLog) redactData(logType LogType, data map[string]interface{}) map[string]interface{} {
cloned := cloneLogMap(data)
if cloned == nil {
return nil
}
if h.redactor != nil {
labels := map[string]string{
"source": "hypercore",
"log_type": string(logType),
}
h.redactor.RedactMapWithLabels(context.Background(), cloned, labels)
}
return cloned
}
func cloneLogMap(in map[string]interface{}) map[string]interface{} {
if in == nil {
return nil
}
out := make(map[string]interface{}, len(in))
for k, v := range in {
out[k] = cloneLogValue(v)
}
return out
}
func cloneLogValue(v interface{}) interface{} {
switch tv := v.(type) {
case map[string]interface{}:
return cloneLogMap(tv)
case map[string]any:
converted := make(map[string]interface{}, len(tv))
for k, val := range tv {
converted[k] = cloneLogValue(val)
}
return converted
case []interface{}:
return cloneLogSlice(tv)
case []any:
converted := make([]interface{}, len(tv))
for i, val := range tv {
converted[i] = cloneLogValue(val)
}
return converted
case []string:
return append([]string(nil), tv...)
default:
return tv
}
}
func cloneLogSlice(in []interface{}) []interface{} {
out := make([]interface{}, len(in))
for i, val := range in {
out[i] = cloneLogValue(val)
}
return out
}
// createSignature creates a simplified signature for the entry
func (h *HypercoreLog) createSignature(entry LogEntry) string {
// In production, this would use proper cryptographic signatures
@@ -345,21 +417,21 @@ func (h *HypercoreLog) createSignature(entry LogEntry) string {
func (h *HypercoreLog) GetStats() map[string]interface{} {
h.mutex.RLock()
defer h.mutex.RUnlock()
typeCount := make(map[LogType]int)
authorCount := make(map[string]int)
for _, entry := range h.entries {
typeCount[entry.Type]++
authorCount[entry.Author]++
}
return map[string]interface{}{
"total_entries": len(h.entries),
"head_hash": h.headHash,
"replicators": len(h.replicators),
"entries_by_type": typeCount,
"total_entries": len(h.entries),
"head_hash": h.headHash,
"replicators": len(h.replicators),
"entries_by_type": typeCount,
"entries_by_author": authorCount,
"peer_id": h.peerID.String(),
"peer_id": h.peerID.String(),
}
}
}

View File

@@ -2,9 +2,11 @@ package runtime
import (
"context"
"fmt"
"time"
"chorus/internal/logging"
"chorus/pkg/dht"
"chorus/pkg/health"
"chorus/pkg/shutdown"
"chorus/pubsub"
@@ -43,37 +45,37 @@ func (r *SharedRuntime) StartAgentMode() error {
// === Comprehensive Health Monitoring & Graceful Shutdown ===
shutdownManager := shutdown.NewManager(30*time.Second, &simpleLogger{logger: r.Logger})
healthManager := health.NewManager(r.Node.ID().ShortString(), AppVersion, &simpleLogger{logger: r.Logger})
healthManager.SetShutdownManager(shutdownManager)
// Register health checks
r.setupHealthChecks(healthManager)
// Register components for graceful shutdown
r.setupGracefulShutdown(shutdownManager, healthManager)
// Start health monitoring
if err := healthManager.Start(); err != nil {
return err
}
r.HealthManager = healthManager
r.Logger.Info("❤️ Health monitoring started")
// Start health HTTP server
if err := healthManager.StartHTTPServer(r.Config.Network.HealthPort); err != nil {
r.Logger.Error("❌ Failed to start health HTTP server: %v", err)
} else {
r.Logger.Info("🏥 Health endpoints available at http://localhost:%d/health", r.Config.Network.HealthPort)
}
// Start shutdown manager
shutdownManager.Start()
r.ShutdownManager = shutdownManager
r.Logger.Info("🛡️ Graceful shutdown manager started")
r.Logger.Info("✅ CHORUS agent system fully operational with health monitoring")
// Wait for graceful shutdown
shutdownManager.Wait()
r.Logger.Info("✅ CHORUS agent system shutdown completed")
@@ -90,7 +92,7 @@ func (r *SharedRuntime) announceAvailability() {
currentTasks := r.TaskTracker.GetActiveTasks()
maxTasks := r.TaskTracker.GetMaxTasks()
isAvailable := len(currentTasks) < maxTasks
status := "ready"
if len(currentTasks) >= maxTasks {
status = "busy"
@@ -99,13 +101,13 @@ func (r *SharedRuntime) announceAvailability() {
}
availability := map[string]interface{}{
"node_id": r.Node.ID().ShortString(),
"node_id": r.Node.ID().ShortString(),
"available_for_work": isAvailable,
"current_tasks": len(currentTasks),
"max_tasks": maxTasks,
"last_activity": time.Now().Unix(),
"status": status,
"timestamp": time.Now().Unix(),
"current_tasks": len(currentTasks),
"max_tasks": maxTasks,
"last_activity": time.Now().Unix(),
"status": status,
"timestamp": time.Now().Unix(),
}
if err := r.PubSub.PublishBzzzMessage(pubsub.AvailabilityBcast, availability); err != nil {
r.Logger.Error("❌ Failed to announce availability: %v", err)
@@ -126,16 +128,79 @@ func (r *SharedRuntime) statusReporter() {
// announceCapabilitiesOnChange announces capabilities when they change
func (r *SharedRuntime) announceCapabilitiesOnChange() {
// Implementation from CHORUS would go here
// For now, just log that capabilities would be announced
r.Logger.Info("📢 Agent capabilities announcement enabled")
if r.PubSub == nil {
r.Logger.Warn("⚠️ Capability broadcast skipped: PubSub not initialized")
return
}
r.Logger.Info("📢 Broadcasting agent capabilities to network")
activeTaskCount := 0
if r.TaskTracker != nil {
activeTaskCount = len(r.TaskTracker.GetActiveTasks())
}
announcement := map[string]interface{}{
"agent_id": r.Config.Agent.ID,
"node_id": r.Node.ID().ShortString(),
"version": AppVersion,
"capabilities": r.Config.Agent.Capabilities,
"expertise": r.Config.Agent.Expertise,
"models": r.Config.Agent.Models,
"specialization": r.Config.Agent.Specialization,
"max_tasks": r.Config.Agent.MaxTasks,
"current_tasks": activeTaskCount,
"timestamp": time.Now().Unix(),
"availability": "ready",
}
if err := r.PubSub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil {
r.Logger.Error("❌ Failed to broadcast capabilities: %v", err)
return
}
r.Logger.Info("✅ Capabilities broadcast published")
// TODO: Watch for live capability changes (role updates, model changes) and re-broadcast
}
// announceRoleOnStartup announces role when the agent starts
func (r *SharedRuntime) announceRoleOnStartup() {
// Implementation from CHORUS would go here
// For now, just log that role would be announced
r.Logger.Info("🎭 Agent role announcement enabled")
role := r.Config.Agent.Role
if role == "" {
r.Logger.Info("🎭 No agent role configured; skipping role announcement")
return
}
if r.PubSub == nil {
r.Logger.Warn("⚠️ Role announcement skipped: PubSub not initialized")
return
}
r.Logger.Info("🎭 Announcing agent role to collaboration mesh")
announcement := map[string]interface{}{
"agent_id": r.Config.Agent.ID,
"node_id": r.Node.ID().ShortString(),
"role": role,
"expertise": r.Config.Agent.Expertise,
"capabilities": r.Config.Agent.Capabilities,
"reports_to": r.Config.Agent.ReportsTo,
"specialization": r.Config.Agent.Specialization,
"timestamp": time.Now().Unix(),
}
opts := pubsub.MessageOptions{
FromRole: role,
Priority: "medium",
ThreadID: fmt.Sprintf("role:%s", role),
}
if err := r.PubSub.PublishRoleBasedMessage(pubsub.RoleAnnouncement, announcement, opts); err != nil {
r.Logger.Error("❌ Failed to announce role: %v", err)
return
}
r.Logger.Info("✅ Role announcement published")
}
func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
@@ -151,31 +216,108 @@ func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
Checker: func(ctx context.Context) health.CheckResult {
healthInfo := r.BackbeatIntegration.GetHealth()
connected, _ := healthInfo["connected"].(bool)
result := health.CheckResult{
Healthy: connected,
Details: healthInfo,
Timestamp: time.Now(),
}
if connected {
result.Message = "BACKBEAT integration healthy and connected"
} else {
result.Message = "BACKBEAT integration not connected"
}
return result
},
}
healthManager.RegisterCheck(backbeatCheck)
}
// Add other health checks (P2P, DHT, etc.)
// Implementation from CHORUS would go here
// Register enhanced health instrumentation when core subsystems are available
if r.PubSub == nil {
r.Logger.Warn("⚠️ Skipping enhanced health checks: PubSub not initialized")
return
}
if r.ElectionManager == nil {
r.Logger.Warn("⚠️ Skipping enhanced health checks: election manager not ready")
return
}
var replication *dht.ReplicationManager
if r.DHTNode != nil {
replication = r.DHTNode.ReplicationManager()
}
enhanced := health.NewEnhancedHealthChecks(
healthManager,
r.ElectionManager,
r.DHTNode,
r.PubSub,
replication,
&simpleLogger{logger: r.Logger},
)
r.EnhancedHealth = enhanced
r.Logger.Info("🩺 Enhanced health checks registered")
}
func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) {
// Register components for graceful shutdown
// Implementation would register all components that need graceful shutdown
if shutdownManager == nil {
r.Logger.Warn("⚠️ Shutdown manager not initialized; graceful teardown skipped")
return
}
if r.HTTPServer != nil {
httpComponent := shutdown.NewGenericComponent("http-api-server", 10, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.HTTPServer.Stop()
})
shutdownManager.Register(httpComponent)
}
if healthManager != nil {
healthComponent := shutdown.NewGenericComponent("health-manager", 15, true).
SetShutdownFunc(func(ctx context.Context) error {
return healthManager.Stop()
})
shutdownManager.Register(healthComponent)
}
if r.UCXIServer != nil {
ucxiComponent := shutdown.NewGenericComponent("ucxi-server", 20, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.UCXIServer.Stop()
})
shutdownManager.Register(ucxiComponent)
}
if r.PubSub != nil {
shutdownManager.Register(shutdown.NewPubSubComponent("pubsub", r.PubSub.Close, 30))
}
if r.DHTNode != nil {
dhtComponent := shutdown.NewGenericComponent("dht-node", 35, true).
SetCloser(r.DHTNode.Close)
shutdownManager.Register(dhtComponent)
}
if r.Node != nil {
shutdownManager.Register(shutdown.NewP2PNodeComponent("p2p-node", r.Node.Close, 40))
}
if r.ElectionManager != nil {
shutdownManager.Register(shutdown.NewElectionManagerComponent("election-manager", r.ElectionManager.Stop, 45))
}
if r.BackbeatIntegration != nil {
backbeatComponent := shutdown.NewGenericComponent("backbeat-integration", 50, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.BackbeatIntegration.Stop()
})
shutdownManager.Register(backbeatComponent)
}
r.Logger.Info("🛡️ Graceful shutdown components registered")
}
}

View File

@@ -21,8 +21,10 @@ import (
"chorus/pkg/dht"
"chorus/pkg/election"
"chorus/pkg/health"
"chorus/pkg/shutdown"
"chorus/pkg/metrics"
"chorus/pkg/prompt"
"chorus/pkg/shhh"
"chorus/pkg/shutdown"
"chorus/pkg/ucxi"
"chorus/pkg/ucxl"
"chorus/pubsub"
@@ -53,8 +55,8 @@ func (l *SimpleLogger) Error(msg string, args ...interface{}) {
// SimpleTaskTracker tracks active tasks for availability reporting
type SimpleTaskTracker struct {
maxTasks int
activeTasks map[string]bool
maxTasks int
activeTasks map[string]bool
decisionPublisher *ucxl.DecisionPublisher
}
@@ -80,7 +82,7 @@ func (t *SimpleTaskTracker) AddTask(taskID string) {
// RemoveTask marks a task as completed and publishes decision if publisher available
func (t *SimpleTaskTracker) RemoveTask(taskID string) {
delete(t.activeTasks, taskID)
// Publish task completion decision if publisher is available
if t.decisionPublisher != nil {
t.publishTaskCompletion(taskID, true, "Task completed successfully", nil)
@@ -92,7 +94,7 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s
if t.decisionPublisher == nil {
return
}
if err := t.decisionPublisher.PublishTaskCompletion(taskID, success, summary, filesModified); err != nil {
fmt.Printf("⚠️ Failed to publish task completion for %s: %v\n", taskID, err)
} else {
@@ -102,32 +104,35 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s
// SharedRuntime contains all the shared P2P infrastructure components
type SharedRuntime struct {
Config *config.Config
Logger *SimpleLogger
Context context.Context
Cancel context.CancelFunc
Node *p2p.Node
PubSub *pubsub.PubSub
HypercoreLog *logging.HypercoreLog
MDNSDiscovery *discovery.MDNSDiscovery
BackbeatIntegration *backbeat.Integration
DHTNode *dht.LibP2PDHT
EncryptedStorage *dht.EncryptedDHTStorage
DecisionPublisher *ucxl.DecisionPublisher
ElectionManager *election.ElectionManager
TaskCoordinator *coordinator.TaskCoordinator
HTTPServer *api.HTTPServer
UCXIServer *ucxi.Server
HealthManager *health.Manager
ShutdownManager *shutdown.Manager
TaskTracker *SimpleTaskTracker
Config *config.Config
Logger *SimpleLogger
Context context.Context
Cancel context.CancelFunc
Node *p2p.Node
PubSub *pubsub.PubSub
HypercoreLog *logging.HypercoreLog
MDNSDiscovery *discovery.MDNSDiscovery
BackbeatIntegration *backbeat.Integration
DHTNode *dht.LibP2PDHT
EncryptedStorage *dht.EncryptedDHTStorage
DecisionPublisher *ucxl.DecisionPublisher
ElectionManager *election.ElectionManager
TaskCoordinator *coordinator.TaskCoordinator
HTTPServer *api.HTTPServer
UCXIServer *ucxi.Server
HealthManager *health.Manager
EnhancedHealth *health.EnhancedHealthChecks
ShutdownManager *shutdown.Manager
TaskTracker *SimpleTaskTracker
Metrics *metrics.CHORUSMetrics
Shhh *shhh.Sentinel
}
// Initialize sets up all shared P2P infrastructure components
func Initialize(appMode string) (*SharedRuntime, error) {
runtime := &SharedRuntime{}
runtime.Logger = &SimpleLogger{}
ctx, cancel := context.WithCancel(context.Background())
runtime.Context = ctx
runtime.Cancel = cancel
@@ -142,7 +147,7 @@ func Initialize(appMode string) (*SharedRuntime, error) {
return nil, fmt.Errorf("configuration error: %v", err)
}
runtime.Config = cfg
runtime.Logger.Info("✅ Configuration loaded successfully")
runtime.Logger.Info("🤖 Agent ID: %s", cfg.Agent.ID)
runtime.Logger.Info("🎯 Specialization: %s", cfg.Agent.Specialization)
@@ -166,6 +171,21 @@ func Initialize(appMode string) (*SharedRuntime, error) {
}
runtime.Logger.Info("✅ AI provider configured successfully")
// Initialize metrics collector
runtime.Metrics = metrics.NewCHORUSMetrics(nil)
// Initialize SHHH sentinel
sentinel, err := shhh.NewSentinel(
shhh.Config{},
shhh.WithFindingObserver(runtime.handleShhhFindings),
)
if err != nil {
return nil, fmt.Errorf("failed to initialize SHHH sentinel: %v", err)
}
sentinel.SetAuditSink(&shhhAuditSink{logger: runtime.Logger})
runtime.Shhh = sentinel
runtime.Logger.Info("🛡️ SHHH sentinel initialized")
// Initialize BACKBEAT integration
var backbeatIntegration *backbeat.Integration
backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger)
@@ -198,6 +218,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
// Initialize Hypercore-style logger for P2P coordination
hlog := logging.NewHypercoreLog(node.ID())
if runtime.Shhh != nil {
hlog.SetRedactor(runtime.Shhh)
}
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
runtime.HypercoreLog = hlog
runtime.Logger.Info("📝 Hypercore logger initialized")
@@ -214,8 +237,11 @@ func Initialize(appMode string) (*SharedRuntime, error) {
if err != nil {
return nil, fmt.Errorf("failed to create PubSub: %v", err)
}
if runtime.Shhh != nil {
ps.SetRedactor(runtime.Shhh)
}
runtime.PubSub = ps
runtime.Logger.Info("📡 PubSub system initialized")
// Join role-based topics if role is configured
@@ -294,12 +320,12 @@ func (r *SharedRuntime) Cleanup() {
func (r *SharedRuntime) initializeElectionSystem() error {
// === Admin Election System ===
electionManager := election.NewElectionManager(r.Context, r.Config, r.Node.Host(), r.PubSub, r.Node.ID().ShortString())
// Set election callbacks with BACKBEAT integration
electionManager.SetCallbacks(
func(oldAdmin, newAdmin string) {
r.Logger.Info("👑 Admin changed: %s -> %s", oldAdmin, newAdmin)
// Track admin change with BACKBEAT if available
if r.BackbeatIntegration != nil {
operationID := fmt.Sprintf("admin-change-%d", time.Now().Unix())
@@ -311,7 +337,7 @@ func (r *SharedRuntime) initializeElectionSystem() error {
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
}
}
// If this node becomes admin, enable SLURP functionality
if newAdmin == r.Node.ID().ShortString() {
r.Logger.Info("🎯 This node is now admin - enabling SLURP functionality")
@@ -324,12 +350,12 @@ func (r *SharedRuntime) initializeElectionSystem() error {
},
func(winner string) {
r.Logger.Info("🏆 Election completed, winner: %s", winner)
// Track election completion with BACKBEAT if available
if r.BackbeatIntegration != nil {
operationID := fmt.Sprintf("election-completed-%d", time.Now().Unix())
if err := r.BackbeatIntegration.StartP2POperation(operationID, "election", 1, map[string]interface{}{
"winner": winner,
"winner": winner,
"node_id": r.Node.ID().ShortString(),
}); err == nil {
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
@@ -337,22 +363,22 @@ func (r *SharedRuntime) initializeElectionSystem() error {
}
},
)
if err := electionManager.Start(); err != nil {
return fmt.Errorf("failed to start election manager: %v", err)
}
r.ElectionManager = electionManager
r.Logger.Info("✅ Election manager started with automated heartbeat management")
return nil
}
func (r *SharedRuntime) initializeDHTStorage() error {
// === DHT Storage and Decision Publishing ===
var dhtNode *dht.LibP2PDHT
var encryptedStorage *dht.EncryptedDHTStorage
var encryptedStorage *dht.EncryptedDHTStorage
var decisionPublisher *ucxl.DecisionPublisher
if r.Config.V2.DHT.Enabled {
// Create DHT
var err error
@@ -361,14 +387,14 @@ func (r *SharedRuntime) initializeDHTStorage() error {
r.Logger.Warn("⚠️ Failed to create DHT: %v", err)
} else {
r.Logger.Info("🕸️ DHT initialized")
// Bootstrap DHT with BACKBEAT tracking
if r.BackbeatIntegration != nil {
operationID := fmt.Sprintf("dht-bootstrap-%d", time.Now().Unix())
if err := r.BackbeatIntegration.StartP2POperation(operationID, "dht_bootstrap", 4, nil); err == nil {
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
}
if err := dhtNode.Bootstrap(); err != nil {
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
@@ -380,22 +406,22 @@ func (r *SharedRuntime) initializeDHTStorage() error {
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
}
}
// Connect to bootstrap peers if configured
// Connect to bootstrap peers if configured
for _, addrStr := range r.Config.V2.DHT.BootstrapPeers {
addr, err := multiaddr.NewMultiaddr(addrStr)
if err != nil {
r.Logger.Warn("⚠️ Invalid bootstrap address %s: %v", addrStr, err)
continue
}
// Extract peer info from multiaddr
info, err := peer.AddrInfoFromP2pAddr(addr)
if err != nil {
r.Logger.Warn("⚠️ Failed to parse peer info from %s: %v", addrStr, err)
continue
}
// Track peer discovery with BACKBEAT if available
if r.BackbeatIntegration != nil {
operationID := fmt.Sprintf("peer-discovery-%d", time.Now().Unix())
@@ -403,7 +429,7 @@ func (r *SharedRuntime) initializeDHTStorage() error {
"peer_addr": addrStr,
}); err == nil {
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
if err := r.Node.Host().Connect(r.Context, *info); err != nil {
r.Logger.Warn("⚠️ Failed to connect to bootstrap peer %s: %v", addrStr, err)
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
@@ -420,20 +446,20 @@ func (r *SharedRuntime) initializeDHTStorage() error {
}
}
}
// Initialize encrypted storage
encryptedStorage = dht.NewEncryptedDHTStorage(
r.Context,
r.Node.Host(),
r.Node.Host(),
dhtNode,
r.Config,
r.Node.ID().ShortString(),
)
// Start cache cleanup
encryptedStorage.StartCacheCleanup(5 * time.Minute)
r.Logger.Info("🔐 Encrypted DHT storage initialized")
// Initialize decision publisher
decisionPublisher = ucxl.NewDecisionPublisher(
r.Context,
@@ -451,11 +477,24 @@ func (r *SharedRuntime) initializeDHTStorage() error {
r.DHTNode = dhtNode
r.EncryptedStorage = encryptedStorage
r.DecisionPublisher = decisionPublisher
return nil
}
func (r *SharedRuntime) initializeServices() error {
// Create simple task tracker ahead of coordinator so broadcasts stay accurate
taskTracker := &SimpleTaskTracker{
maxTasks: r.Config.Agent.MaxTasks,
activeTasks: make(map[string]bool),
}
// Connect decision publisher to task tracker if available
if r.DecisionPublisher != nil {
taskTracker.decisionPublisher = r.DecisionPublisher
r.Logger.Info("📤 Task completion decisions will be published to DHT")
}
r.TaskTracker = taskTracker
// === Task Coordination Integration ===
taskCoordinator := coordinator.NewTaskCoordinator(
r.Context,
@@ -464,8 +503,9 @@ func (r *SharedRuntime) initializeServices() error {
r.Config,
r.Node.ID().ShortString(),
nil, // HMMM router placeholder
taskTracker,
)
taskCoordinator.Start()
r.TaskCoordinator = taskCoordinator
r.Logger.Info("✅ Task coordination system active")
@@ -487,14 +527,14 @@ func (r *SharedRuntime) initializeServices() error {
if storageDir == "" {
storageDir = filepath.Join(os.TempDir(), "chorus-ucxi-storage")
}
storage, err := ucxi.NewBasicContentStorage(storageDir)
if err != nil {
r.Logger.Warn("⚠️ Failed to create UCXI storage: %v", err)
} else {
resolver := ucxi.NewBasicAddressResolver(r.Node.ID().ShortString())
resolver.SetDefaultTTL(r.Config.UCXL.Resolution.CacheTTL)
ucxiConfig := ucxi.ServerConfig{
Port: r.Config.UCXL.Server.Port,
BasePath: r.Config.UCXL.Server.BasePath,
@@ -502,7 +542,7 @@ func (r *SharedRuntime) initializeServices() error {
Storage: storage,
Logger: ucxi.SimpleLogger{},
}
ucxiServer = ucxi.NewServer(ucxiConfig)
go func() {
r.Logger.Info("🔗 UCXI server starting on :%d", r.Config.UCXL.Server.Port)
@@ -515,35 +555,41 @@ func (r *SharedRuntime) initializeServices() error {
r.Logger.Info("⚪ UCXI server disabled")
}
r.UCXIServer = ucxiServer
// Create simple task tracker
taskTracker := &SimpleTaskTracker{
maxTasks: r.Config.Agent.MaxTasks,
activeTasks: make(map[string]bool),
}
// Connect decision publisher to task tracker if available
if r.DecisionPublisher != nil {
taskTracker.decisionPublisher = r.DecisionPublisher
r.Logger.Info("📤 Task completion decisions will be published to DHT")
}
r.TaskTracker = taskTracker
return nil
}
func (r *SharedRuntime) handleShhhFindings(ctx context.Context, findings []shhh.Finding) {
if r == nil || r.Metrics == nil {
return
}
for _, finding := range findings {
r.Metrics.IncrementSHHHFindings(finding.Rule, string(finding.Severity), finding.Count)
}
}
type shhhAuditSink struct {
logger *SimpleLogger
}
func (s *shhhAuditSink) RecordRedaction(_ context.Context, event shhh.AuditEvent) {
if s == nil || s.logger == nil {
return
}
s.logger.Warn("🔒 SHHH redaction applied (rule=%s severity=%s path=%s)", event.Rule, event.Severity, event.Path)
}
// initializeAIProvider configures the reasoning engine with the appropriate AI provider
func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
// Set the AI provider
reasoning.SetAIProvider(cfg.AI.Provider)
// Configure the selected provider
switch cfg.AI.Provider {
case "resetdata":
if cfg.AI.ResetData.APIKey == "" {
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for resetdata provider")
}
resetdataConfig := reasoning.ResetDataConfig{
BaseURL: cfg.AI.ResetData.BaseURL,
APIKey: cfg.AI.ResetData.APIKey,
@@ -551,19 +597,19 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
Timeout: cfg.AI.ResetData.Timeout,
}
reasoning.SetResetDataConfig(resetdataConfig)
logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s",
logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s",
cfg.AI.ResetData.BaseURL, cfg.AI.ResetData.Model)
case "ollama":
reasoning.SetOllamaEndpoint(cfg.AI.Ollama.Endpoint)
logger.Info("🦙 Ollama AI provider configured - Endpoint: %s", cfg.AI.Ollama.Endpoint)
default:
logger.Warn("⚠️ Unknown AI provider '%s', defaulting to resetdata", cfg.AI.Provider)
if cfg.AI.ResetData.APIKey == "" {
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for default resetdata provider")
}
resetdataConfig := reasoning.ResetDataConfig{
BaseURL: cfg.AI.ResetData.BaseURL,
APIKey: cfg.AI.ResetData.APIKey,
@@ -573,7 +619,7 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
reasoning.SetResetDataConfig(resetdataConfig)
reasoning.SetAIProvider("resetdata")
}
// Configure model selection
reasoning.SetModelConfig(
cfg.Agent.Models,