From 1bb736c09aa3aa139c2ce5d7b83e1c56d987958e Mon Sep 17 00:00:00 2001 From: anthonyrawlins Date: Sat, 20 Sep 2025 23:21:35 +1000 Subject: [PATCH] Harden CHORUS security and messaging stack --- README.md | 4 +- coordinator/task_coordinator.go | 218 ++++--- .../2025-02-16-shhh-sentinel-foundation.md | 30 + go.mod | 2 +- internal/logging/hypercore.go | 202 ++++-- internal/runtime/agent_support.go | 202 +++++- internal/runtime/shared.go | 192 +++--- pkg/config/config.go | 88 +-- pkg/config/security.go | 94 ++- pkg/dht/dht.go | 213 +++--- pkg/dht/dht_test.go | 617 ++++-------------- pkg/dht/encrypted_storage_security_test.go | 602 +++-------------- pkg/dht/real_dht.go | 115 +++- pkg/dht/replication_test.go | 237 +++---- pkg/election/election.go | 362 +++++----- pkg/election/election_test.go | 508 ++++---------- pkg/metrics/prometheus_metrics.go | 262 ++++---- pkg/shhh/doc.go | 11 + pkg/shhh/rule.go | 130 ++++ pkg/shhh/sentinel.go | 407 ++++++++++++ pkg/shhh/sentinel_test.go | 95 +++ pkg/shhh/stats.go | 60 ++ pkg/shhh/types.go | 73 +++ pkg/ucxl/decision_publisher.go | 100 +-- pubsub/pubsub.go | 443 ++++++++----- 25 files changed, 2793 insertions(+), 2474 deletions(-) create mode 100644 docs/decisions/2025-02-16-shhh-sentinel-foundation.md create mode 100644 pkg/shhh/doc.go create mode 100644 pkg/shhh/rule.go create mode 100644 pkg/shhh/sentinel.go create mode 100644 pkg/shhh/sentinel_test.go create mode 100644 pkg/shhh/stats.go create mode 100644 pkg/shhh/types.go diff --git a/README.md b/README.md index d2844b0..eba0df4 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ CHORUS is the runtime that ties the CHORUS ecosystem together: libp2p mesh, DHT- | DHT + DecisionPublisher | βœ… Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. | | Election manager | βœ… Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. | | SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. | -| SHHH (secrets sentinel) | ❌ Not implemented | No `pkg/shhh` module yet; redaction hooks are pending. | +| SHHH (secrets sentinel) | 🚧 Sentinel live | `pkg/shhh` redacts hypercore + PubSub payloads with audit + metrics hooks (policy replay TBD). | | HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). | See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing. @@ -33,7 +33,7 @@ You’ll get a single agent container with: - DHT storage (AGE-encrypted) - HTTP API + health endpoints -**Missing today:** SLURP context resolution, SHHH redaction, HMMM per-issue routing. Expect log warnings/TODOs for those paths. +**Missing today:** SLURP context resolution, advanced SHHH policy replay, HMMM per-issue routing. Expect log warnings/TODOs for those paths. ## Roadmap Highlights diff --git a/coordinator/task_coordinator.go b/coordinator/task_coordinator.go index 5c51a2c..7ce4e54 100644 --- a/coordinator/task_coordinator.go +++ b/coordinator/task_coordinator.go @@ -9,50 +9,57 @@ import ( "chorus/internal/logging" "chorus/pkg/config" - "chorus/pubsub" - "chorus/pkg/repository" "chorus/pkg/hmmm" + "chorus/pkg/repository" + "chorus/pubsub" "github.com/google/uuid" "github.com/libp2p/go-libp2p/core/peer" ) +// TaskProgressTracker is notified when tasks start and complete so availability broadcasts stay accurate. +type TaskProgressTracker interface { + AddTask(taskID string) + RemoveTask(taskID string) +} + // TaskCoordinator manages task discovery, assignment, and execution across multiple repositories type TaskCoordinator struct { - pubsub *pubsub.PubSub - hlog *logging.HypercoreLog - ctx context.Context - config *config.Config - hmmmRouter *hmmm.Router - + pubsub *pubsub.PubSub + hlog *logging.HypercoreLog + ctx context.Context + config *config.Config + hmmmRouter *hmmm.Router + // Repository management - providers map[int]repository.TaskProvider // projectID -> provider - providerLock sync.RWMutex - factory repository.ProviderFactory - + providers map[int]repository.TaskProvider // projectID -> provider + providerLock sync.RWMutex + factory repository.ProviderFactory + // Task management - activeTasks map[string]*ActiveTask // taskKey -> active task - taskLock sync.RWMutex - taskMatcher repository.TaskMatcher - + activeTasks map[string]*ActiveTask // taskKey -> active task + taskLock sync.RWMutex + taskMatcher repository.TaskMatcher + taskTracker TaskProgressTracker + // Agent tracking - nodeID string - agentInfo *repository.AgentInfo - + nodeID string + agentInfo *repository.AgentInfo + // Sync settings - syncInterval time.Duration - lastSync map[int]time.Time - syncLock sync.RWMutex + syncInterval time.Duration + lastSync map[int]time.Time + syncLock sync.RWMutex } // ActiveTask represents a task currently being worked on type ActiveTask struct { - Task *repository.Task - Provider repository.TaskProvider - ProjectID int - ClaimedAt time.Time - Status string // claimed, working, completed, failed - AgentID string - Results map[string]interface{} + Task *repository.Task + Provider repository.TaskProvider + ProjectID int + ClaimedAt time.Time + Status string // claimed, working, completed, failed + AgentID string + Results map[string]interface{} } // NewTaskCoordinator creates a new task coordinator @@ -63,7 +70,9 @@ func NewTaskCoordinator( cfg *config.Config, nodeID string, hmmmRouter *hmmm.Router, + tracker TaskProgressTracker, ) *TaskCoordinator { + coordinator := &TaskCoordinator{ pubsub: ps, hlog: hlog, @@ -75,10 +84,11 @@ func NewTaskCoordinator( lastSync: make(map[int]time.Time), factory: &repository.DefaultProviderFactory{}, taskMatcher: &repository.DefaultTaskMatcher{}, + taskTracker: tracker, nodeID: nodeID, syncInterval: 30 * time.Second, } - + // Create agent info from config coordinator.agentInfo = &repository.AgentInfo{ ID: cfg.Agent.ID, @@ -91,23 +101,23 @@ func NewTaskCoordinator( Performance: map[string]interface{}{"score": 0.8}, // Default performance score Availability: "available", } - + return coordinator } // Start begins the task coordination process func (tc *TaskCoordinator) Start() { fmt.Printf("🎯 Starting task coordinator for agent %s (%s)\n", tc.agentInfo.ID, tc.agentInfo.Role) - + // Announce role and capabilities tc.announceAgentRole() - + // Start periodic task discovery and sync go tc.taskDiscoveryLoop() - + // Start role-based message handling tc.pubsub.SetAntennaeMessageHandler(tc.handleRoleMessage) - + fmt.Printf("βœ… Task coordinator started\n") } @@ -185,13 +195,17 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor tc.agentInfo.CurrentTasks = len(tc.activeTasks) tc.taskLock.Unlock() + if tc.taskTracker != nil { + tc.taskTracker.AddTask(taskKey) + } + // Log task claim tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{ - "task_number": task.Number, - "repository": task.Repository, - "title": task.Title, + "task_number": task.Number, + "repository": task.Repository, + "title": task.Title, "required_role": task.RequiredRole, - "priority": task.Priority, + "priority": task.Priority, }) // Announce task claim @@ -212,11 +226,11 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor } if err := tc.hmmmRouter.Publish(tc.ctx, seedMsg); err != nil { fmt.Printf("⚠️ Failed to seed HMMM room for task %d: %v\n", task.Number, err) - tc.hlog.AppendString("system_error", map[string]interface{}{ - "error": "hmmm_seed_failed", - "task_number": task.Number, - "repository": task.Repository, - "message": err.Error(), + tc.hlog.AppendString("system_error", map[string]interface{}{ + "error": "hmmm_seed_failed", + "task_number": task.Number, + "repository": task.Repository, + "message": err.Error(), }) } else { fmt.Printf("🐜 Seeded HMMM room for task %d\n", task.Number) @@ -259,14 +273,14 @@ func (tc *TaskCoordinator) shouldRequestCollaboration(task *repository.Task) boo // requestTaskCollaboration requests collaboration for a task func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) { data := map[string]interface{}{ - "task_number": task.Number, - "repository": task.Repository, - "title": task.Title, - "required_role": task.RequiredRole, + "task_number": task.Number, + "repository": task.Repository, + "title": task.Title, + "required_role": task.RequiredRole, "required_expertise": task.RequiredExpertise, - "priority": task.Priority, - "requester_role": tc.agentInfo.Role, - "reason": "expertise_gap", + "priority": task.Priority, + "requester_role": tc.agentInfo.Role, + "reason": "expertise_gap", } opts := pubsub.MessageOptions{ @@ -288,7 +302,7 @@ func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) { // executeTask executes a claimed task func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) { taskKey := fmt.Sprintf("%s:%d", activeTask.Task.Repository, activeTask.Task.Number) - + // Update status tc.taskLock.Lock() activeTask.Status = "working" @@ -302,10 +316,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) { // Complete the task results := map[string]interface{}{ - "status": "completed", + "status": "completed", "completion_time": time.Now().Format(time.RFC3339), - "agent_id": tc.agentInfo.ID, - "agent_role": tc.agentInfo.Role, + "agent_id": tc.agentInfo.ID, + "agent_role": tc.agentInfo.Role, } taskResult := &repository.TaskResult{ @@ -316,13 +330,13 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) { err := activeTask.Provider.CompleteTask(activeTask.Task, taskResult) if err != nil { fmt.Printf("❌ Failed to complete task %s #%d: %v\n", activeTask.Task.Repository, activeTask.Task.Number, err) - + // Update status to failed tc.taskLock.Lock() activeTask.Status = "failed" activeTask.Results = map[string]interface{}{"error": err.Error()} tc.taskLock.Unlock() - + return } @@ -334,6 +348,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) { tc.agentInfo.CurrentTasks = len(tc.activeTasks) tc.taskLock.Unlock() + if tc.taskTracker != nil { + tc.taskTracker.RemoveTask(taskKey) + } + // Log completion tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{ "task_number": activeTask.Task.Number, @@ -378,19 +396,19 @@ func (tc *TaskCoordinator) announceAgentRole() { // announceTaskClaim announces that this agent has claimed a task func (tc *TaskCoordinator) announceTaskClaim(task *repository.Task) { data := map[string]interface{}{ - "task_number": task.Number, - "repository": task.Repository, - "title": task.Title, - "agent_id": tc.agentInfo.ID, - "agent_role": tc.agentInfo.Role, - "claim_time": time.Now().Format(time.RFC3339), + "task_number": task.Number, + "repository": task.Repository, + "title": task.Title, + "agent_id": tc.agentInfo.ID, + "agent_role": tc.agentInfo.Role, + "claim_time": time.Now().Format(time.RFC3339), "estimated_completion": time.Now().Add(time.Hour).Format(time.RFC3339), } opts := pubsub.MessageOptions{ - FromRole: tc.agentInfo.Role, - Priority: "medium", - ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number), + FromRole: tc.agentInfo.Role, + Priority: "medium", + ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number), } err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskProgress, data, opts) @@ -463,15 +481,15 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I } } - if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks { + if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks { // Offer help responseData := map[string]interface{}{ - "agent_id": tc.agentInfo.ID, - "agent_role": tc.agentInfo.Role, - "expertise": tc.agentInfo.Expertise, - "availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks, - "offer_type": "collaboration", - "response_to": msg.Data, + "agent_id": tc.agentInfo.ID, + "agent_role": tc.agentInfo.Role, + "expertise": tc.agentInfo.Expertise, + "availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks, + "offer_type": "collaboration", + "response_to": msg.Data, } opts := pubsub.MessageOptions{ @@ -480,34 +498,34 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I ThreadID: msg.ThreadID, } - err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts) - if err != nil { - fmt.Printf("⚠️ Failed to offer help: %v\n", err) - } else { - fmt.Printf("🀝 Offered help for task collaboration\n") - } + err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts) + if err != nil { + fmt.Printf("⚠️ Failed to offer help: %v\n", err) + } else { + fmt.Printf("🀝 Offered help for task collaboration\n") + } - // Also reflect the help offer into the HMMM per-issue room (best-effort) - if tc.hmmmRouter != nil { - if tn, ok := msg.Data["task_number"].(float64); ok { - issueID := int64(tn) - hmsg := hmmm.Message{ - Version: 1, - Type: "meta_msg", - IssueID: issueID, - ThreadID: fmt.Sprintf("issue-%d", issueID), - MsgID: uuid.New().String(), - NodeID: tc.nodeID, - HopCount: 0, - Timestamp: time.Now().UTC(), - Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks), - } - if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil { - fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err) - } - } - } - } + // Also reflect the help offer into the HMMM per-issue room (best-effort) + if tc.hmmmRouter != nil { + if tn, ok := msg.Data["task_number"].(float64); ok { + issueID := int64(tn) + hmsg := hmmm.Message{ + Version: 1, + Type: "meta_msg", + IssueID: issueID, + ThreadID: fmt.Sprintf("issue-%d", issueID), + MsgID: uuid.New().String(), + NodeID: tc.nodeID, + HopCount: 0, + Timestamp: time.Now().UTC(), + Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks), + } + if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil { + fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err) + } + } + } + } } // handleExpertiseRequest handles requests for specific expertise diff --git a/docs/decisions/2025-02-16-shhh-sentinel-foundation.md b/docs/decisions/2025-02-16-shhh-sentinel-foundation.md new file mode 100644 index 0000000..42cf0fb --- /dev/null +++ b/docs/decisions/2025-02-16-shhh-sentinel-foundation.md @@ -0,0 +1,30 @@ +# Decision Record: Establish SHHH Sentinel Foundations + +- **Date:** 2025-02-16 +- **Status:** Accepted +- **Context:** CHORUS roadmap Phase 1 requires a secrets sentinel (`pkg/shhh`) before we wire COOEE/WHOOSH telemetry and audit plumbing. The runtime previously emitted placeholder TODOs and logged sensitive payloads without guard rails. + +## Problem +- We lacked a reusable component to detect and redact secrets prior to log/telemetry fan-out. +- Without a dedicated sentinel we could not attach audit sinks or surface metrics for redaction events, blocking roadmap item `SEC-SHHH`. + +## Decision +- Introduce `pkg/shhh` as the SHHH sentinel with: + - Curated default rules (API keys, bearer/OAuth tokens, private key PEM blocks, OpenAI secrets). + - Extensible configuration for custom regex rules and per-rule severity/tags. + - Optional audit sink and statistics collection for integration with COOEE/WHOOSH pipelines. + - Helpers to redact free-form text and `map[string]any` payloads used by our logging pipeline. + +## Rationale +- Starting with a focused set of high-signal rules gives immediate coverage for the most damaging leak classes without delaying larger SLURP/SHHH workstreams. +- The API mirrors other CHORUS subsystems (options, config structs, stats snapshots) so existing operators can plug metrics/audits without bespoke glue. +- Providing deterministic findings/locations simplifies future enforcement (e.g., WHOOSH UI badges, COOEE replay) while keeping implementation lean. + +## Impact +- Runtime components can now instantiate SHHH and guarantee `[REDACTED]` placeholders for sensitive fields. +- Audit/event plumbing can be wired incrementallyβ€”hashes are emitted for replay without storing raw secrets. +- Future roadmap tasks (policy driven rules, replay, UCXL evidence) can extend `pkg/shhh` rather than implementing ad-hoc redaction in each subsystem. + +## Related Work +- Roadmap: `docs/progress/CHORUS-WHOOSH-roadmap.md` (Phase 1.2 `SEC-SHHH`). +- README coverage gap noted in `README.md` table (SHHH not implemented). diff --git a/go.mod b/go.mod index 9897068..99d1e95 100644 --- a/go.mod +++ b/go.mod @@ -159,4 +159,4 @@ require ( lukechampine.com/blake3 v1.2.1 // indirect ) -replace github.com/chorus-services/backbeat => /home/tony/chorus/project-queues/active/BACKBEAT/backbeat/prototype +replace github.com/chorus-services/backbeat => ../BACKBEAT/backbeat/prototype diff --git a/internal/logging/hypercore.go b/internal/logging/hypercore.go index 58bb954..fac1689 100644 --- a/internal/logging/hypercore.go +++ b/internal/logging/hypercore.go @@ -1,6 +1,7 @@ package logging import ( + "context" "crypto/sha256" "encoding/hex" "encoding/json" @@ -8,6 +9,7 @@ import ( "sync" "time" + "chorus/pkg/shhh" "github.com/libp2p/go-libp2p/core/peer" ) @@ -23,12 +25,14 @@ type HypercoreLog struct { entries []LogEntry mutex sync.RWMutex peerID peer.ID - + // Verification chain headHash string - + // Replication replicators map[peer.ID]*Replicator + + redactor *shhh.Sentinel } // LogEntry represents a single entry in the distributed log @@ -48,12 +52,12 @@ type LogType string const ( // Bzzz coordination logs - TaskAnnounced LogType = "task_announced" - TaskClaimed LogType = "task_claimed" - TaskProgress LogType = "task_progress" - TaskCompleted LogType = "task_completed" - TaskFailed LogType = "task_failed" - + TaskAnnounced LogType = "task_announced" + TaskClaimed LogType = "task_claimed" + TaskProgress LogType = "task_progress" + TaskCompleted LogType = "task_completed" + TaskFailed LogType = "task_failed" + // HMMM meta-discussion logs PlanProposed LogType = "plan_proposed" ObjectionRaised LogType = "objection_raised" @@ -65,17 +69,17 @@ const ( TaskHelpReceived LogType = "task_help_received" // System logs - PeerJoined LogType = "peer_joined" - PeerLeft LogType = "peer_left" + PeerJoined LogType = "peer_joined" + PeerLeft LogType = "peer_left" CapabilityBcast LogType = "capability_broadcast" - NetworkEvent LogType = "network_event" + NetworkEvent LogType = "network_event" ) // Replicator handles log replication with other peers type Replicator struct { - peerID peer.ID + peerID peer.ID lastSyncIndex uint64 - connected bool + connected bool } // NewHypercoreLog creates a new distributed log for a peer @@ -88,6 +92,13 @@ func NewHypercoreLog(peerID peer.ID) *HypercoreLog { } } +// SetRedactor wires the SHHH sentinel so log payloads are sanitized before persistence. +func (h *HypercoreLog) SetRedactor(redactor *shhh.Sentinel) { + h.mutex.Lock() + defer h.mutex.Unlock() + h.redactor = redactor +} + // AppendString is a convenience method for string log types (to match interface) func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error { _, err := h.Append(LogType(logType), data) @@ -98,38 +109,40 @@ func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*LogEntry, error) { h.mutex.Lock() defer h.mutex.Unlock() - + index := uint64(len(h.entries)) - + + sanitized := h.redactData(logType, data) + entry := LogEntry{ Index: index, Timestamp: time.Now(), Author: h.peerID.String(), Type: logType, - Data: data, + Data: sanitized, PrevHash: h.headHash, } - + // Calculate hash entryHash, err := h.calculateEntryHash(entry) if err != nil { return nil, fmt.Errorf("failed to calculate entry hash: %w", err) } entry.Hash = entryHash - + // Add simple signature (in production, use proper cryptographic signatures) entry.Signature = h.createSignature(entry) - + // Append to log h.entries = append(h.entries, entry) h.headHash = entryHash - - fmt.Printf("πŸ“ Log entry appended: %s [%d] by %s\n", + + fmt.Printf("πŸ“ Log entry appended: %s [%d] by %s\n", logType, index, h.peerID.ShortString()) - + // Trigger replication to connected peers go h.replicateEntry(entry) - + return &entry, nil } @@ -137,11 +150,11 @@ func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*Lo func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + if index >= uint64(len(h.entries)) { return nil, fmt.Errorf("entry %d not found", index) } - + return &h.entries[index], nil } @@ -149,7 +162,7 @@ func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) { func (h *HypercoreLog) Length() uint64 { h.mutex.RLock() defer h.mutex.RUnlock() - + return uint64(len(h.entries)) } @@ -157,22 +170,22 @@ func (h *HypercoreLog) Length() uint64 { func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + if start >= uint64(len(h.entries)) { return nil, fmt.Errorf("start index %d out of range", start) } - + if end > uint64(len(h.entries)) { end = uint64(len(h.entries)) } - + if start > end { return nil, fmt.Errorf("invalid range: start %d > end %d", start, end) } - + result := make([]LogEntry, end-start) copy(result, h.entries[start:end]) - + return result, nil } @@ -180,14 +193,14 @@ func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) { func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + var result []LogEntry for _, entry := range h.entries { if entry.Type == logType { result = append(result, entry) } } - + return result, nil } @@ -195,14 +208,14 @@ func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) { func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + var result []LogEntry for _, entry := range h.entries { if entry.Author == author { result = append(result, entry) } } - + return result, nil } @@ -210,20 +223,20 @@ func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) { func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + totalEntries := len(h.entries) if count <= 0 || totalEntries == 0 { return []LogEntry{}, nil } - + start := 0 if totalEntries > count { start = totalEntries - count } - + result := make([]LogEntry, totalEntries-start) copy(result, h.entries[start:]) - + return result, nil } @@ -231,14 +244,14 @@ func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) { func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) { h.mutex.RLock() defer h.mutex.RUnlock() - + if sinceIndex >= uint64(len(h.entries)) { return []LogEntry{}, nil } - + result := make([]LogEntry, len(h.entries)-int(sinceIndex)) copy(result, h.entries[sinceIndex:]) - + return result, nil } @@ -246,27 +259,27 @@ func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) { func (h *HypercoreLog) VerifyIntegrity() error { h.mutex.RLock() defer h.mutex.RUnlock() - + var prevHash string for i, entry := range h.entries { // Verify previous hash link if entry.PrevHash != prevHash { return fmt.Errorf("integrity error at entry %d: prev_hash mismatch", i) } - + // Verify entry hash calculatedHash, err := h.calculateEntryHash(entry) if err != nil { return fmt.Errorf("failed to calculate hash for entry %d: %w", i, err) } - + if entry.Hash != calculatedHash { return fmt.Errorf("integrity error at entry %d: hash mismatch", i) } - + prevHash = entry.Hash } - + return nil } @@ -274,13 +287,13 @@ func (h *HypercoreLog) VerifyIntegrity() error { func (h *HypercoreLog) AddReplicator(peerID peer.ID) { h.mutex.Lock() defer h.mutex.Unlock() - + h.replicators[peerID] = &Replicator{ - peerID: peerID, + peerID: peerID, lastSyncIndex: 0, - connected: true, + connected: true, } - + fmt.Printf("πŸ”„ Added replicator: %s\n", peerID.ShortString()) } @@ -288,7 +301,7 @@ func (h *HypercoreLog) AddReplicator(peerID peer.ID) { func (h *HypercoreLog) RemoveReplicator(peerID peer.ID) { h.mutex.Lock() defer h.mutex.Unlock() - + delete(h.replicators, peerID) fmt.Printf("πŸ”„ Removed replicator: %s\n", peerID.ShortString()) } @@ -303,10 +316,10 @@ func (h *HypercoreLog) replicateEntry(entry LogEntry) { } } h.mutex.RUnlock() - + for _, replicator := range replicators { // In a real implementation, this would send the entry over the network - fmt.Printf("πŸ”„ Replicating entry %d to %s\n", + fmt.Printf("πŸ”„ Replicating entry %d to %s\n", entry.Index, replicator.peerID.ShortString()) } } @@ -322,16 +335,75 @@ func (h *HypercoreLog) calculateEntryHash(entry LogEntry) (string, error) { Data: entry.Data, PrevHash: entry.PrevHash, } - + entryBytes, err := json.Marshal(entryForHash) if err != nil { return "", err } - + hash := sha256.Sum256(entryBytes) return hex.EncodeToString(hash[:]), nil } +func (h *HypercoreLog) redactData(logType LogType, data map[string]interface{}) map[string]interface{} { + cloned := cloneLogMap(data) + if cloned == nil { + return nil + } + if h.redactor != nil { + labels := map[string]string{ + "source": "hypercore", + "log_type": string(logType), + } + h.redactor.RedactMapWithLabels(context.Background(), cloned, labels) + } + return cloned +} + +func cloneLogMap(in map[string]interface{}) map[string]interface{} { + if in == nil { + return nil + } + out := make(map[string]interface{}, len(in)) + for k, v := range in { + out[k] = cloneLogValue(v) + } + return out +} + +func cloneLogValue(v interface{}) interface{} { + switch tv := v.(type) { + case map[string]interface{}: + return cloneLogMap(tv) + case map[string]any: + converted := make(map[string]interface{}, len(tv)) + for k, val := range tv { + converted[k] = cloneLogValue(val) + } + return converted + case []interface{}: + return cloneLogSlice(tv) + case []any: + converted := make([]interface{}, len(tv)) + for i, val := range tv { + converted[i] = cloneLogValue(val) + } + return converted + case []string: + return append([]string(nil), tv...) + default: + return tv + } +} + +func cloneLogSlice(in []interface{}) []interface{} { + out := make([]interface{}, len(in)) + for i, val := range in { + out[i] = cloneLogValue(val) + } + return out +} + // createSignature creates a simplified signature for the entry func (h *HypercoreLog) createSignature(entry LogEntry) string { // In production, this would use proper cryptographic signatures @@ -345,21 +417,21 @@ func (h *HypercoreLog) createSignature(entry LogEntry) string { func (h *HypercoreLog) GetStats() map[string]interface{} { h.mutex.RLock() defer h.mutex.RUnlock() - + typeCount := make(map[LogType]int) authorCount := make(map[string]int) - + for _, entry := range h.entries { typeCount[entry.Type]++ authorCount[entry.Author]++ } - + return map[string]interface{}{ - "total_entries": len(h.entries), - "head_hash": h.headHash, - "replicators": len(h.replicators), - "entries_by_type": typeCount, + "total_entries": len(h.entries), + "head_hash": h.headHash, + "replicators": len(h.replicators), + "entries_by_type": typeCount, "entries_by_author": authorCount, - "peer_id": h.peerID.String(), + "peer_id": h.peerID.String(), } -} \ No newline at end of file +} diff --git a/internal/runtime/agent_support.go b/internal/runtime/agent_support.go index 9f62965..04519d8 100644 --- a/internal/runtime/agent_support.go +++ b/internal/runtime/agent_support.go @@ -2,9 +2,11 @@ package runtime import ( "context" + "fmt" "time" "chorus/internal/logging" + "chorus/pkg/dht" "chorus/pkg/health" "chorus/pkg/shutdown" "chorus/pubsub" @@ -43,37 +45,37 @@ func (r *SharedRuntime) StartAgentMode() error { // === Comprehensive Health Monitoring & Graceful Shutdown === shutdownManager := shutdown.NewManager(30*time.Second, &simpleLogger{logger: r.Logger}) - + healthManager := health.NewManager(r.Node.ID().ShortString(), AppVersion, &simpleLogger{logger: r.Logger}) healthManager.SetShutdownManager(shutdownManager) - + // Register health checks r.setupHealthChecks(healthManager) - + // Register components for graceful shutdown r.setupGracefulShutdown(shutdownManager, healthManager) - + // Start health monitoring if err := healthManager.Start(); err != nil { return err } r.HealthManager = healthManager r.Logger.Info("❀️ Health monitoring started") - + // Start health HTTP server if err := healthManager.StartHTTPServer(r.Config.Network.HealthPort); err != nil { r.Logger.Error("❌ Failed to start health HTTP server: %v", err) } else { r.Logger.Info("πŸ₯ Health endpoints available at http://localhost:%d/health", r.Config.Network.HealthPort) } - + // Start shutdown manager shutdownManager.Start() r.ShutdownManager = shutdownManager r.Logger.Info("πŸ›‘οΈ Graceful shutdown manager started") - + r.Logger.Info("βœ… CHORUS agent system fully operational with health monitoring") - + // Wait for graceful shutdown shutdownManager.Wait() r.Logger.Info("βœ… CHORUS agent system shutdown completed") @@ -90,7 +92,7 @@ func (r *SharedRuntime) announceAvailability() { currentTasks := r.TaskTracker.GetActiveTasks() maxTasks := r.TaskTracker.GetMaxTasks() isAvailable := len(currentTasks) < maxTasks - + status := "ready" if len(currentTasks) >= maxTasks { status = "busy" @@ -99,13 +101,13 @@ func (r *SharedRuntime) announceAvailability() { } availability := map[string]interface{}{ - "node_id": r.Node.ID().ShortString(), + "node_id": r.Node.ID().ShortString(), "available_for_work": isAvailable, - "current_tasks": len(currentTasks), - "max_tasks": maxTasks, - "last_activity": time.Now().Unix(), - "status": status, - "timestamp": time.Now().Unix(), + "current_tasks": len(currentTasks), + "max_tasks": maxTasks, + "last_activity": time.Now().Unix(), + "status": status, + "timestamp": time.Now().Unix(), } if err := r.PubSub.PublishBzzzMessage(pubsub.AvailabilityBcast, availability); err != nil { r.Logger.Error("❌ Failed to announce availability: %v", err) @@ -126,16 +128,79 @@ func (r *SharedRuntime) statusReporter() { // announceCapabilitiesOnChange announces capabilities when they change func (r *SharedRuntime) announceCapabilitiesOnChange() { - // Implementation from CHORUS would go here - // For now, just log that capabilities would be announced - r.Logger.Info("πŸ“’ Agent capabilities announcement enabled") + if r.PubSub == nil { + r.Logger.Warn("⚠️ Capability broadcast skipped: PubSub not initialized") + return + } + + r.Logger.Info("πŸ“’ Broadcasting agent capabilities to network") + + activeTaskCount := 0 + if r.TaskTracker != nil { + activeTaskCount = len(r.TaskTracker.GetActiveTasks()) + } + + announcement := map[string]interface{}{ + "agent_id": r.Config.Agent.ID, + "node_id": r.Node.ID().ShortString(), + "version": AppVersion, + "capabilities": r.Config.Agent.Capabilities, + "expertise": r.Config.Agent.Expertise, + "models": r.Config.Agent.Models, + "specialization": r.Config.Agent.Specialization, + "max_tasks": r.Config.Agent.MaxTasks, + "current_tasks": activeTaskCount, + "timestamp": time.Now().Unix(), + "availability": "ready", + } + + if err := r.PubSub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil { + r.Logger.Error("❌ Failed to broadcast capabilities: %v", err) + return + } + + r.Logger.Info("βœ… Capabilities broadcast published") + + // TODO: Watch for live capability changes (role updates, model changes) and re-broadcast } // announceRoleOnStartup announces role when the agent starts func (r *SharedRuntime) announceRoleOnStartup() { - // Implementation from CHORUS would go here - // For now, just log that role would be announced - r.Logger.Info("🎭 Agent role announcement enabled") + role := r.Config.Agent.Role + if role == "" { + r.Logger.Info("🎭 No agent role configured; skipping role announcement") + return + } + if r.PubSub == nil { + r.Logger.Warn("⚠️ Role announcement skipped: PubSub not initialized") + return + } + + r.Logger.Info("🎭 Announcing agent role to collaboration mesh") + + announcement := map[string]interface{}{ + "agent_id": r.Config.Agent.ID, + "node_id": r.Node.ID().ShortString(), + "role": role, + "expertise": r.Config.Agent.Expertise, + "capabilities": r.Config.Agent.Capabilities, + "reports_to": r.Config.Agent.ReportsTo, + "specialization": r.Config.Agent.Specialization, + "timestamp": time.Now().Unix(), + } + + opts := pubsub.MessageOptions{ + FromRole: role, + Priority: "medium", + ThreadID: fmt.Sprintf("role:%s", role), + } + + if err := r.PubSub.PublishRoleBasedMessage(pubsub.RoleAnnouncement, announcement, opts); err != nil { + r.Logger.Error("❌ Failed to announce role: %v", err) + return + } + + r.Logger.Info("βœ… Role announcement published") } func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) { @@ -151,31 +216,108 @@ func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) { Checker: func(ctx context.Context) health.CheckResult { healthInfo := r.BackbeatIntegration.GetHealth() connected, _ := healthInfo["connected"].(bool) - + result := health.CheckResult{ Healthy: connected, Details: healthInfo, Timestamp: time.Now(), } - + if connected { result.Message = "BACKBEAT integration healthy and connected" } else { result.Message = "BACKBEAT integration not connected" } - + return result }, } healthManager.RegisterCheck(backbeatCheck) } - - // Add other health checks (P2P, DHT, etc.) - // Implementation from CHORUS would go here + + // Register enhanced health instrumentation when core subsystems are available + if r.PubSub == nil { + r.Logger.Warn("⚠️ Skipping enhanced health checks: PubSub not initialized") + return + } + if r.ElectionManager == nil { + r.Logger.Warn("⚠️ Skipping enhanced health checks: election manager not ready") + return + } + + var replication *dht.ReplicationManager + if r.DHTNode != nil { + replication = r.DHTNode.ReplicationManager() + } + + enhanced := health.NewEnhancedHealthChecks( + healthManager, + r.ElectionManager, + r.DHTNode, + r.PubSub, + replication, + &simpleLogger{logger: r.Logger}, + ) + + r.EnhancedHealth = enhanced + r.Logger.Info("🩺 Enhanced health checks registered") } func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) { - // Register components for graceful shutdown - // Implementation would register all components that need graceful shutdown + if shutdownManager == nil { + r.Logger.Warn("⚠️ Shutdown manager not initialized; graceful teardown skipped") + return + } + + if r.HTTPServer != nil { + httpComponent := shutdown.NewGenericComponent("http-api-server", 10, true). + SetShutdownFunc(func(ctx context.Context) error { + return r.HTTPServer.Stop() + }) + shutdownManager.Register(httpComponent) + } + + if healthManager != nil { + healthComponent := shutdown.NewGenericComponent("health-manager", 15, true). + SetShutdownFunc(func(ctx context.Context) error { + return healthManager.Stop() + }) + shutdownManager.Register(healthComponent) + } + + if r.UCXIServer != nil { + ucxiComponent := shutdown.NewGenericComponent("ucxi-server", 20, true). + SetShutdownFunc(func(ctx context.Context) error { + return r.UCXIServer.Stop() + }) + shutdownManager.Register(ucxiComponent) + } + + if r.PubSub != nil { + shutdownManager.Register(shutdown.NewPubSubComponent("pubsub", r.PubSub.Close, 30)) + } + + if r.DHTNode != nil { + dhtComponent := shutdown.NewGenericComponent("dht-node", 35, true). + SetCloser(r.DHTNode.Close) + shutdownManager.Register(dhtComponent) + } + + if r.Node != nil { + shutdownManager.Register(shutdown.NewP2PNodeComponent("p2p-node", r.Node.Close, 40)) + } + + if r.ElectionManager != nil { + shutdownManager.Register(shutdown.NewElectionManagerComponent("election-manager", r.ElectionManager.Stop, 45)) + } + + if r.BackbeatIntegration != nil { + backbeatComponent := shutdown.NewGenericComponent("backbeat-integration", 50, true). + SetShutdownFunc(func(ctx context.Context) error { + return r.BackbeatIntegration.Stop() + }) + shutdownManager.Register(backbeatComponent) + } + r.Logger.Info("πŸ›‘οΈ Graceful shutdown components registered") -} \ No newline at end of file +} diff --git a/internal/runtime/shared.go b/internal/runtime/shared.go index 8174320..028abf6 100644 --- a/internal/runtime/shared.go +++ b/internal/runtime/shared.go @@ -21,8 +21,10 @@ import ( "chorus/pkg/dht" "chorus/pkg/election" "chorus/pkg/health" - "chorus/pkg/shutdown" + "chorus/pkg/metrics" "chorus/pkg/prompt" + "chorus/pkg/shhh" + "chorus/pkg/shutdown" "chorus/pkg/ucxi" "chorus/pkg/ucxl" "chorus/pubsub" @@ -53,8 +55,8 @@ func (l *SimpleLogger) Error(msg string, args ...interface{}) { // SimpleTaskTracker tracks active tasks for availability reporting type SimpleTaskTracker struct { - maxTasks int - activeTasks map[string]bool + maxTasks int + activeTasks map[string]bool decisionPublisher *ucxl.DecisionPublisher } @@ -80,7 +82,7 @@ func (t *SimpleTaskTracker) AddTask(taskID string) { // RemoveTask marks a task as completed and publishes decision if publisher available func (t *SimpleTaskTracker) RemoveTask(taskID string) { delete(t.activeTasks, taskID) - + // Publish task completion decision if publisher is available if t.decisionPublisher != nil { t.publishTaskCompletion(taskID, true, "Task completed successfully", nil) @@ -92,7 +94,7 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s if t.decisionPublisher == nil { return } - + if err := t.decisionPublisher.PublishTaskCompletion(taskID, success, summary, filesModified); err != nil { fmt.Printf("⚠️ Failed to publish task completion for %s: %v\n", taskID, err) } else { @@ -102,32 +104,35 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s // SharedRuntime contains all the shared P2P infrastructure components type SharedRuntime struct { - Config *config.Config - Logger *SimpleLogger - Context context.Context - Cancel context.CancelFunc - Node *p2p.Node - PubSub *pubsub.PubSub - HypercoreLog *logging.HypercoreLog - MDNSDiscovery *discovery.MDNSDiscovery - BackbeatIntegration *backbeat.Integration - DHTNode *dht.LibP2PDHT - EncryptedStorage *dht.EncryptedDHTStorage - DecisionPublisher *ucxl.DecisionPublisher - ElectionManager *election.ElectionManager - TaskCoordinator *coordinator.TaskCoordinator - HTTPServer *api.HTTPServer - UCXIServer *ucxi.Server - HealthManager *health.Manager - ShutdownManager *shutdown.Manager - TaskTracker *SimpleTaskTracker + Config *config.Config + Logger *SimpleLogger + Context context.Context + Cancel context.CancelFunc + Node *p2p.Node + PubSub *pubsub.PubSub + HypercoreLog *logging.HypercoreLog + MDNSDiscovery *discovery.MDNSDiscovery + BackbeatIntegration *backbeat.Integration + DHTNode *dht.LibP2PDHT + EncryptedStorage *dht.EncryptedDHTStorage + DecisionPublisher *ucxl.DecisionPublisher + ElectionManager *election.ElectionManager + TaskCoordinator *coordinator.TaskCoordinator + HTTPServer *api.HTTPServer + UCXIServer *ucxi.Server + HealthManager *health.Manager + EnhancedHealth *health.EnhancedHealthChecks + ShutdownManager *shutdown.Manager + TaskTracker *SimpleTaskTracker + Metrics *metrics.CHORUSMetrics + Shhh *shhh.Sentinel } // Initialize sets up all shared P2P infrastructure components func Initialize(appMode string) (*SharedRuntime, error) { runtime := &SharedRuntime{} runtime.Logger = &SimpleLogger{} - + ctx, cancel := context.WithCancel(context.Background()) runtime.Context = ctx runtime.Cancel = cancel @@ -142,7 +147,7 @@ func Initialize(appMode string) (*SharedRuntime, error) { return nil, fmt.Errorf("configuration error: %v", err) } runtime.Config = cfg - + runtime.Logger.Info("βœ… Configuration loaded successfully") runtime.Logger.Info("πŸ€– Agent ID: %s", cfg.Agent.ID) runtime.Logger.Info("🎯 Specialization: %s", cfg.Agent.Specialization) @@ -166,6 +171,21 @@ func Initialize(appMode string) (*SharedRuntime, error) { } runtime.Logger.Info("βœ… AI provider configured successfully") + // Initialize metrics collector + runtime.Metrics = metrics.NewCHORUSMetrics(nil) + + // Initialize SHHH sentinel + sentinel, err := shhh.NewSentinel( + shhh.Config{}, + shhh.WithFindingObserver(runtime.handleShhhFindings), + ) + if err != nil { + return nil, fmt.Errorf("failed to initialize SHHH sentinel: %v", err) + } + sentinel.SetAuditSink(&shhhAuditSink{logger: runtime.Logger}) + runtime.Shhh = sentinel + runtime.Logger.Info("πŸ›‘οΈ SHHH sentinel initialized") + // Initialize BACKBEAT integration var backbeatIntegration *backbeat.Integration backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger) @@ -198,6 +218,9 @@ func Initialize(appMode string) (*SharedRuntime, error) { // Initialize Hypercore-style logger for P2P coordination hlog := logging.NewHypercoreLog(node.ID()) + if runtime.Shhh != nil { + hlog.SetRedactor(runtime.Shhh) + } hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"}) runtime.HypercoreLog = hlog runtime.Logger.Info("πŸ“ Hypercore logger initialized") @@ -214,8 +237,11 @@ func Initialize(appMode string) (*SharedRuntime, error) { if err != nil { return nil, fmt.Errorf("failed to create PubSub: %v", err) } + if runtime.Shhh != nil { + ps.SetRedactor(runtime.Shhh) + } runtime.PubSub = ps - + runtime.Logger.Info("πŸ“‘ PubSub system initialized") // Join role-based topics if role is configured @@ -294,12 +320,12 @@ func (r *SharedRuntime) Cleanup() { func (r *SharedRuntime) initializeElectionSystem() error { // === Admin Election System === electionManager := election.NewElectionManager(r.Context, r.Config, r.Node.Host(), r.PubSub, r.Node.ID().ShortString()) - + // Set election callbacks with BACKBEAT integration electionManager.SetCallbacks( func(oldAdmin, newAdmin string) { r.Logger.Info("πŸ‘‘ Admin changed: %s -> %s", oldAdmin, newAdmin) - + // Track admin change with BACKBEAT if available if r.BackbeatIntegration != nil { operationID := fmt.Sprintf("admin-change-%d", time.Now().Unix()) @@ -311,7 +337,7 @@ func (r *SharedRuntime) initializeElectionSystem() error { r.BackbeatIntegration.CompleteP2POperation(operationID, 1) } } - + // If this node becomes admin, enable SLURP functionality if newAdmin == r.Node.ID().ShortString() { r.Logger.Info("🎯 This node is now admin - enabling SLURP functionality") @@ -324,12 +350,12 @@ func (r *SharedRuntime) initializeElectionSystem() error { }, func(winner string) { r.Logger.Info("πŸ† Election completed, winner: %s", winner) - + // Track election completion with BACKBEAT if available if r.BackbeatIntegration != nil { operationID := fmt.Sprintf("election-completed-%d", time.Now().Unix()) if err := r.BackbeatIntegration.StartP2POperation(operationID, "election", 1, map[string]interface{}{ - "winner": winner, + "winner": winner, "node_id": r.Node.ID().ShortString(), }); err == nil { r.BackbeatIntegration.CompleteP2POperation(operationID, 1) @@ -337,22 +363,22 @@ func (r *SharedRuntime) initializeElectionSystem() error { } }, ) - + if err := electionManager.Start(); err != nil { return fmt.Errorf("failed to start election manager: %v", err) } r.ElectionManager = electionManager r.Logger.Info("βœ… Election manager started with automated heartbeat management") - + return nil } func (r *SharedRuntime) initializeDHTStorage() error { // === DHT Storage and Decision Publishing === var dhtNode *dht.LibP2PDHT - var encryptedStorage *dht.EncryptedDHTStorage + var encryptedStorage *dht.EncryptedDHTStorage var decisionPublisher *ucxl.DecisionPublisher - + if r.Config.V2.DHT.Enabled { // Create DHT var err error @@ -361,14 +387,14 @@ func (r *SharedRuntime) initializeDHTStorage() error { r.Logger.Warn("⚠️ Failed to create DHT: %v", err) } else { r.Logger.Info("πŸ•ΈοΈ DHT initialized") - + // Bootstrap DHT with BACKBEAT tracking if r.BackbeatIntegration != nil { operationID := fmt.Sprintf("dht-bootstrap-%d", time.Now().Unix()) if err := r.BackbeatIntegration.StartP2POperation(operationID, "dht_bootstrap", 4, nil); err == nil { r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0) } - + if err := dhtNode.Bootstrap(); err != nil { r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err) r.BackbeatIntegration.FailP2POperation(operationID, err.Error()) @@ -380,22 +406,22 @@ func (r *SharedRuntime) initializeDHTStorage() error { r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err) } } - - // Connect to bootstrap peers if configured + + // Connect to bootstrap peers if configured for _, addrStr := range r.Config.V2.DHT.BootstrapPeers { addr, err := multiaddr.NewMultiaddr(addrStr) if err != nil { r.Logger.Warn("⚠️ Invalid bootstrap address %s: %v", addrStr, err) continue } - + // Extract peer info from multiaddr info, err := peer.AddrInfoFromP2pAddr(addr) if err != nil { r.Logger.Warn("⚠️ Failed to parse peer info from %s: %v", addrStr, err) continue } - + // Track peer discovery with BACKBEAT if available if r.BackbeatIntegration != nil { operationID := fmt.Sprintf("peer-discovery-%d", time.Now().Unix()) @@ -403,7 +429,7 @@ func (r *SharedRuntime) initializeDHTStorage() error { "peer_addr": addrStr, }); err == nil { r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0) - + if err := r.Node.Host().Connect(r.Context, *info); err != nil { r.Logger.Warn("⚠️ Failed to connect to bootstrap peer %s: %v", addrStr, err) r.BackbeatIntegration.FailP2POperation(operationID, err.Error()) @@ -420,20 +446,20 @@ func (r *SharedRuntime) initializeDHTStorage() error { } } } - + // Initialize encrypted storage encryptedStorage = dht.NewEncryptedDHTStorage( r.Context, - r.Node.Host(), + r.Node.Host(), dhtNode, r.Config, r.Node.ID().ShortString(), ) - + // Start cache cleanup encryptedStorage.StartCacheCleanup(5 * time.Minute) r.Logger.Info("πŸ” Encrypted DHT storage initialized") - + // Initialize decision publisher decisionPublisher = ucxl.NewDecisionPublisher( r.Context, @@ -451,11 +477,24 @@ func (r *SharedRuntime) initializeDHTStorage() error { r.DHTNode = dhtNode r.EncryptedStorage = encryptedStorage r.DecisionPublisher = decisionPublisher - + return nil } func (r *SharedRuntime) initializeServices() error { + // Create simple task tracker ahead of coordinator so broadcasts stay accurate + taskTracker := &SimpleTaskTracker{ + maxTasks: r.Config.Agent.MaxTasks, + activeTasks: make(map[string]bool), + } + + // Connect decision publisher to task tracker if available + if r.DecisionPublisher != nil { + taskTracker.decisionPublisher = r.DecisionPublisher + r.Logger.Info("πŸ“€ Task completion decisions will be published to DHT") + } + r.TaskTracker = taskTracker + // === Task Coordination Integration === taskCoordinator := coordinator.NewTaskCoordinator( r.Context, @@ -464,8 +503,9 @@ func (r *SharedRuntime) initializeServices() error { r.Config, r.Node.ID().ShortString(), nil, // HMMM router placeholder + taskTracker, ) - + taskCoordinator.Start() r.TaskCoordinator = taskCoordinator r.Logger.Info("βœ… Task coordination system active") @@ -487,14 +527,14 @@ func (r *SharedRuntime) initializeServices() error { if storageDir == "" { storageDir = filepath.Join(os.TempDir(), "chorus-ucxi-storage") } - + storage, err := ucxi.NewBasicContentStorage(storageDir) if err != nil { r.Logger.Warn("⚠️ Failed to create UCXI storage: %v", err) } else { resolver := ucxi.NewBasicAddressResolver(r.Node.ID().ShortString()) resolver.SetDefaultTTL(r.Config.UCXL.Resolution.CacheTTL) - + ucxiConfig := ucxi.ServerConfig{ Port: r.Config.UCXL.Server.Port, BasePath: r.Config.UCXL.Server.BasePath, @@ -502,7 +542,7 @@ func (r *SharedRuntime) initializeServices() error { Storage: storage, Logger: ucxi.SimpleLogger{}, } - + ucxiServer = ucxi.NewServer(ucxiConfig) go func() { r.Logger.Info("πŸ”— UCXI server starting on :%d", r.Config.UCXL.Server.Port) @@ -515,35 +555,41 @@ func (r *SharedRuntime) initializeServices() error { r.Logger.Info("βšͺ UCXI server disabled") } r.UCXIServer = ucxiServer - - // Create simple task tracker - taskTracker := &SimpleTaskTracker{ - maxTasks: r.Config.Agent.MaxTasks, - activeTasks: make(map[string]bool), - } - - // Connect decision publisher to task tracker if available - if r.DecisionPublisher != nil { - taskTracker.decisionPublisher = r.DecisionPublisher - r.Logger.Info("πŸ“€ Task completion decisions will be published to DHT") - } - r.TaskTracker = taskTracker - return nil } +func (r *SharedRuntime) handleShhhFindings(ctx context.Context, findings []shhh.Finding) { + if r == nil || r.Metrics == nil { + return + } + for _, finding := range findings { + r.Metrics.IncrementSHHHFindings(finding.Rule, string(finding.Severity), finding.Count) + } +} + +type shhhAuditSink struct { + logger *SimpleLogger +} + +func (s *shhhAuditSink) RecordRedaction(_ context.Context, event shhh.AuditEvent) { + if s == nil || s.logger == nil { + return + } + s.logger.Warn("πŸ”’ SHHH redaction applied (rule=%s severity=%s path=%s)", event.Rule, event.Severity, event.Path) +} + // initializeAIProvider configures the reasoning engine with the appropriate AI provider func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error { // Set the AI provider reasoning.SetAIProvider(cfg.AI.Provider) - + // Configure the selected provider switch cfg.AI.Provider { case "resetdata": if cfg.AI.ResetData.APIKey == "" { return fmt.Errorf("RESETDATA_API_KEY environment variable is required for resetdata provider") } - + resetdataConfig := reasoning.ResetDataConfig{ BaseURL: cfg.AI.ResetData.BaseURL, APIKey: cfg.AI.ResetData.APIKey, @@ -551,19 +597,19 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error { Timeout: cfg.AI.ResetData.Timeout, } reasoning.SetResetDataConfig(resetdataConfig) - logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s", + logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s", cfg.AI.ResetData.BaseURL, cfg.AI.ResetData.Model) - + case "ollama": reasoning.SetOllamaEndpoint(cfg.AI.Ollama.Endpoint) logger.Info("πŸ¦™ Ollama AI provider configured - Endpoint: %s", cfg.AI.Ollama.Endpoint) - + default: logger.Warn("⚠️ Unknown AI provider '%s', defaulting to resetdata", cfg.AI.Provider) if cfg.AI.ResetData.APIKey == "" { return fmt.Errorf("RESETDATA_API_KEY environment variable is required for default resetdata provider") } - + resetdataConfig := reasoning.ResetDataConfig{ BaseURL: cfg.AI.ResetData.BaseURL, APIKey: cfg.AI.ResetData.APIKey, @@ -573,7 +619,7 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error { reasoning.SetResetDataConfig(resetdataConfig) reasoning.SetAIProvider("resetdata") } - + // Configure model selection reasoning.SetModelConfig( cfg.Agent.Models, diff --git a/pkg/config/config.go b/pkg/config/config.go index bd104ab..f1a4b2f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -28,17 +28,18 @@ type Config struct { // AgentConfig defines agent-specific settings type AgentConfig struct { - ID string `yaml:"id"` - Specialization string `yaml:"specialization"` - MaxTasks int `yaml:"max_tasks"` - Capabilities []string `yaml:"capabilities"` - Models []string `yaml:"models"` - Role string `yaml:"role"` - Expertise []string `yaml:"expertise"` - ReportsTo string `yaml:"reports_to"` - Deliverables []string `yaml:"deliverables"` - ModelSelectionWebhook string `yaml:"model_selection_webhook"` - DefaultReasoningModel string `yaml:"default_reasoning_model"` + ID string `yaml:"id"` + Specialization string `yaml:"specialization"` + MaxTasks int `yaml:"max_tasks"` + Capabilities []string `yaml:"capabilities"` + Models []string `yaml:"models"` + Role string `yaml:"role"` + Project string `yaml:"project"` + Expertise []string `yaml:"expertise"` + ReportsTo string `yaml:"reports_to"` + Deliverables []string `yaml:"deliverables"` + ModelSelectionWebhook string `yaml:"model_selection_webhook"` + DefaultReasoningModel string `yaml:"default_reasoning_model"` } // NetworkConfig defines network and API settings @@ -65,9 +66,9 @@ type LicenseConfig struct { // AIConfig defines AI service settings type AIConfig struct { - Provider string `yaml:"provider"` - Ollama OllamaConfig `yaml:"ollama"` - ResetData ResetDataConfig `yaml:"resetdata"` + Provider string `yaml:"provider"` + Ollama OllamaConfig `yaml:"ollama"` + ResetData ResetDataConfig `yaml:"resetdata"` } // OllamaConfig defines Ollama-specific settings @@ -78,10 +79,10 @@ type OllamaConfig struct { // ResetDataConfig defines ResetData LLM service settings type ResetDataConfig struct { - BaseURL string `yaml:"base_url"` - APIKey string `yaml:"api_key"` - Model string `yaml:"model"` - Timeout time.Duration `yaml:"timeout"` + BaseURL string `yaml:"base_url"` + APIKey string `yaml:"api_key"` + Model string `yaml:"model"` + Timeout time.Duration `yaml:"timeout"` } // LoggingConfig defines logging settings @@ -103,9 +104,9 @@ type DHTConfig struct { // UCXLConfig defines UCXL protocol settings type UCXLConfig struct { - Enabled bool `yaml:"enabled"` - Server ServerConfig `yaml:"server"` - Storage StorageConfig `yaml:"storage"` + Enabled bool `yaml:"enabled"` + Server ServerConfig `yaml:"server"` + Storage StorageConfig `yaml:"storage"` Resolution ResolutionConfig `yaml:"resolution"` } @@ -133,25 +134,26 @@ type SlurpConfig struct { // WHOOSHAPIConfig defines WHOOSH API integration settings type WHOOSHAPIConfig struct { - URL string `yaml:"url"` - BaseURL string `yaml:"base_url"` - Token string `yaml:"token"` - Enabled bool `yaml:"enabled"` + URL string `yaml:"url"` + BaseURL string `yaml:"base_url"` + Token string `yaml:"token"` + Enabled bool `yaml:"enabled"` } // LoadFromEnvironment loads configuration from environment variables func LoadFromEnvironment() (*Config, error) { cfg := &Config{ Agent: AgentConfig{ - ID: getEnvOrDefault("CHORUS_AGENT_ID", ""), - Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"), - MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3), - Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}), - Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}), - Role: getEnvOrDefault("CHORUS_ROLE", ""), - Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}), - ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""), - Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}), + ID: getEnvOrDefault("CHORUS_AGENT_ID", ""), + Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"), + MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3), + Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}), + Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}), + Role: getEnvOrDefault("CHORUS_ROLE", ""), + Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"), + Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}), + ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""), + Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}), ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""), DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"), }, @@ -214,10 +216,10 @@ func LoadFromEnvironment() (*Config, error) { AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true), AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"), ElectionConfig: ElectionConfig{ - DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second), - HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second), - ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second), - DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second), + DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second), + HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second), + ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second), + DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second), LeadershipScoring: &LeadershipScoring{ UptimeWeight: 0.4, CapabilityWeight: 0.3, @@ -247,7 +249,7 @@ func (c *Config) Validate() error { if c.License.LicenseID == "" { return fmt.Errorf("CHORUS_LICENSE_ID is required") } - + if c.Agent.ID == "" { // Auto-generate agent ID if not provided hostname, _ := os.Hostname() @@ -258,7 +260,7 @@ func (c *Config) Validate() error { c.Agent.ID = fmt.Sprintf("chorus-%s", hostname) } } - + return nil } @@ -329,14 +331,14 @@ func getEnvOrFileContent(envKey, fileEnvKey string) string { if value := os.Getenv(envKey); value != "" { return value } - + // Then try reading from file path specified in fileEnvKey if filePath := os.Getenv(fileEnvKey); filePath != "" { if content, err := ioutil.ReadFile(filePath); err == nil { return strings.TrimSpace(string(content)) } } - + return "" } @@ -360,4 +362,4 @@ func LoadConfig(configPath string) (*Config, error) { func SaveConfig(cfg *Config, configPath string) error { // For containers, configuration is environment-based, so this is a no-op return nil -} \ No newline at end of file +} diff --git a/pkg/config/security.go b/pkg/config/security.go index 7b78012..b490bcc 100644 --- a/pkg/config/security.go +++ b/pkg/config/security.go @@ -12,27 +12,27 @@ const ( // SecurityConfig defines security-related configuration type SecurityConfig struct { - KeyRotationDays int `yaml:"key_rotation_days"` - AuditLogging bool `yaml:"audit_logging"` - AuditPath string `yaml:"audit_path"` - ElectionConfig ElectionConfig `yaml:"election"` + KeyRotationDays int `yaml:"key_rotation_days"` + AuditLogging bool `yaml:"audit_logging"` + AuditPath string `yaml:"audit_path"` + ElectionConfig ElectionConfig `yaml:"election"` } // ElectionConfig defines election timing and behavior settings type ElectionConfig struct { - DiscoveryTimeout time.Duration `yaml:"discovery_timeout"` - HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"` - ElectionTimeout time.Duration `yaml:"election_timeout"` - DiscoveryBackoff time.Duration `yaml:"discovery_backoff"` - LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"` + DiscoveryTimeout time.Duration `yaml:"discovery_timeout"` + HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"` + ElectionTimeout time.Duration `yaml:"election_timeout"` + DiscoveryBackoff time.Duration `yaml:"discovery_backoff"` + LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"` } // LeadershipScoring defines weights for election scoring type LeadershipScoring struct { - UptimeWeight float64 `yaml:"uptime_weight"` - CapabilityWeight float64 `yaml:"capability_weight"` - ExperienceWeight float64 `yaml:"experience_weight"` - LoadWeight float64 `yaml:"load_weight"` + UptimeWeight float64 `yaml:"uptime_weight"` + CapabilityWeight float64 `yaml:"capability_weight"` + ExperienceWeight float64 `yaml:"experience_weight"` + LoadWeight float64 `yaml:"load_weight"` } // AgeKeyPair represents an Age encryption key pair @@ -43,14 +43,14 @@ type AgeKeyPair struct { // RoleDefinition represents a role configuration type RoleDefinition struct { - Name string `yaml:"name"` - Description string `yaml:"description"` - Capabilities []string `yaml:"capabilities"` - AccessLevel string `yaml:"access_level"` - AuthorityLevel string `yaml:"authority_level"` - Keys *AgeKeyPair `yaml:"keys,omitempty"` - AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name - CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt + Name string `yaml:"name"` + Description string `yaml:"description"` + Capabilities []string `yaml:"capabilities"` + AccessLevel string `yaml:"access_level"` + AuthorityLevel string `yaml:"authority_level"` + Keys *AgeKeyPair `yaml:"keys,omitempty"` + AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name + CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt } // GetPredefinedRoles returns the predefined roles for the system @@ -65,7 +65,7 @@ func GetPredefinedRoles() map[string]*RoleDefinition { CanDecrypt: []string{"project_manager", "backend_developer", "frontend_developer", "devops_engineer", "security_engineer"}, }, "backend_developer": { - Name: "backend_developer", + Name: "backend_developer", Description: "Backend development and API work", Capabilities: []string{"backend", "api", "database"}, AccessLevel: "medium", @@ -90,12 +90,52 @@ func GetPredefinedRoles() map[string]*RoleDefinition { }, "security_engineer": { Name: "security_engineer", - Description: "Security oversight and hardening", + Description: "Security oversight and hardening", Capabilities: []string{"security", "audit", "compliance"}, AccessLevel: "high", AuthorityLevel: AuthorityAdmin, CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"}, }, + "security_expert": { + Name: "security_expert", + Description: "Advanced security analysis and policy work", + Capabilities: []string{"security", "policy", "response"}, + AccessLevel: "high", + AuthorityLevel: AuthorityAdmin, + CanDecrypt: []string{"security_expert", "security_engineer", "project_manager"}, + }, + "senior_software_architect": { + Name: "senior_software_architect", + Description: "Architecture governance and system design", + Capabilities: []string{"architecture", "design", "coordination"}, + AccessLevel: "high", + AuthorityLevel: AuthorityAdmin, + CanDecrypt: []string{"senior_software_architect", "project_manager", "backend_developer", "frontend_developer"}, + }, + "qa_engineer": { + Name: "qa_engineer", + Description: "Quality assurance and testing", + Capabilities: []string{"testing", "validation"}, + AccessLevel: "medium", + AuthorityLevel: AuthorityFull, + CanDecrypt: []string{"qa_engineer", "backend_developer", "frontend_developer"}, + }, + "readonly_user": { + Name: "readonly_user", + Description: "Read-only observer with audit access", + Capabilities: []string{"observation"}, + AccessLevel: "low", + AuthorityLevel: AuthorityReadOnly, + CanDecrypt: []string{"readonly_user"}, + }, + "suggestion_only_role": { + Name: "suggestion_only_role", + Description: "Can propose suggestions but not execute", + Capabilities: []string{"recommendation"}, + AccessLevel: "low", + AuthorityLevel: AuthoritySuggestion, + CanDecrypt: []string{"suggestion_only_role"}, + }, } } @@ -106,16 +146,16 @@ func (c *Config) CanDecryptRole(targetRole string) (bool, error) { if !exists { return false, nil } - + targetRoleDef, exists := roles[targetRole] if !exists { return false, nil } - + // Simple access level check currentLevel := getAccessLevelValue(currentRole.AccessLevel) targetLevel := getAccessLevelValue(targetRoleDef.AccessLevel) - + return currentLevel >= targetLevel, nil } @@ -130,4 +170,4 @@ func getAccessLevelValue(level string) int { default: return 0 } -} \ No newline at end of file +} diff --git a/pkg/dht/dht.go b/pkg/dht/dht.go index 6e0f5ed..92840c1 100644 --- a/pkg/dht/dht.go +++ b/pkg/dht/dht.go @@ -6,33 +6,34 @@ import ( "sync" "time" + "crypto/sha256" + "github.com/ipfs/go-cid" + dht "github.com/libp2p/go-libp2p-kad-dht" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/core/routing" - dht "github.com/libp2p/go-libp2p-kad-dht" "github.com/multiformats/go-multiaddr" "github.com/multiformats/go-multihash" - "github.com/ipfs/go-cid" - "crypto/sha256" ) // LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery type LibP2PDHT struct { - host host.Host - kdht *dht.IpfsDHT - ctx context.Context - cancel context.CancelFunc - config *Config - + host host.Host + kdht *dht.IpfsDHT + ctx context.Context + cancel context.CancelFunc + config *Config + startTime time.Time + // Bootstrap state bootstrapped bool bootstrapMutex sync.RWMutex - + // Peer management knownPeers map[peer.ID]*PeerInfo peersMutex sync.RWMutex - + // Replication management replicationManager *ReplicationManager } @@ -41,30 +42,32 @@ type LibP2PDHT struct { type Config struct { // Bootstrap nodes for initial DHT discovery BootstrapPeers []multiaddr.Multiaddr - + // Protocol prefix for CHORUS DHT ProtocolPrefix string - + // Bootstrap timeout BootstrapTimeout time.Duration - + // Peer discovery interval DiscoveryInterval time.Duration - + // DHT mode (client, server, auto) Mode dht.ModeOpt - + // Enable automatic bootstrap AutoBootstrap bool } // PeerInfo holds information about discovered peers +const defaultProviderResultLimit = 20 + type PeerInfo struct { - ID peer.ID - Addresses []multiaddr.Multiaddr - Agent string - Role string - LastSeen time.Time + ID peer.ID + Addresses []multiaddr.Multiaddr + Agent string + Role string + LastSeen time.Time Capabilities []string } @@ -74,23 +77,28 @@ func DefaultConfig() *Config { ProtocolPrefix: "/CHORUS", BootstrapTimeout: 30 * time.Second, DiscoveryInterval: 60 * time.Second, - Mode: dht.ModeAuto, - AutoBootstrap: true, + Mode: dht.ModeAuto, + AutoBootstrap: true, } } -// NewLibP2PDHT creates a new LibP2PDHT instance +// NewDHT is a backward compatible helper that delegates to NewLibP2PDHT. +func NewDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) { + return NewLibP2PDHT(ctx, host, opts...) +} + +// NewLibP2PDHT creates a new LibP2PDHT instance func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) { config := DefaultConfig() for _, opt := range opts { opt(config) } - + // Create context with cancellation dhtCtx, cancel := context.WithCancel(ctx) - + // Create Kademlia DHT - kdht, err := dht.New(dhtCtx, host, + kdht, err := dht.New(dhtCtx, host, dht.Mode(config.Mode), dht.ProtocolPrefix(protocol.ID(config.ProtocolPrefix)), ) @@ -98,22 +106,23 @@ func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PD cancel() return nil, fmt.Errorf("failed to create DHT: %w", err) } - + d := &LibP2PDHT{ host: host, kdht: kdht, ctx: dhtCtx, cancel: cancel, config: config, + startTime: time.Now(), knownPeers: make(map[peer.ID]*PeerInfo), } - + // Initialize replication manager d.replicationManager = NewReplicationManager(dhtCtx, kdht, DefaultReplicationConfig()) - + // Start background processes go d.startBackgroundTasks() - + return d, nil } @@ -178,25 +187,25 @@ func WithAutoBootstrap(auto bool) Option { func (d *LibP2PDHT) Bootstrap() error { d.bootstrapMutex.Lock() defer d.bootstrapMutex.Unlock() - + if d.bootstrapped { return nil } - + // Connect to bootstrap peers if len(d.config.BootstrapPeers) == 0 { // Use default IPFS bootstrap peers if none configured d.config.BootstrapPeers = dht.DefaultBootstrapPeers } - + // Bootstrap the DHT bootstrapCtx, cancel := context.WithTimeout(d.ctx, d.config.BootstrapTimeout) defer cancel() - + if err := d.kdht.Bootstrap(bootstrapCtx); err != nil { return fmt.Errorf("DHT bootstrap failed: %w", err) } - + // Connect to bootstrap peers var connected int for _, peerAddr := range d.config.BootstrapPeers { @@ -204,7 +213,7 @@ func (d *LibP2PDHT) Bootstrap() error { if err != nil { continue } - + connectCtx, cancel := context.WithTimeout(d.ctx, 10*time.Second) if err := d.host.Connect(connectCtx, *addrInfo); err != nil { cancel() @@ -213,11 +222,11 @@ func (d *LibP2PDHT) Bootstrap() error { cancel() connected++ } - + if connected == 0 { return fmt.Errorf("failed to connect to any bootstrap peers") } - + d.bootstrapped = true return nil } @@ -233,13 +242,13 @@ func (d *LibP2PDHT) IsBootstrapped() bool { func (d *LibP2PDHT) keyToCID(key string) (cid.Cid, error) { // Hash the key hash := sha256.Sum256([]byte(key)) - + // Create multihash mh, err := multihash.EncodeName(hash[:], "sha2-256") if err != nil { return cid.Undef, err } - + // Create CID return cid.NewCidV1(cid.Raw, mh), nil } @@ -249,13 +258,13 @@ func (d *LibP2PDHT) Provide(ctx context.Context, key string) error { if !d.IsBootstrapped() { return fmt.Errorf("DHT not bootstrapped") } - + // Convert key to CID keyCID, err := d.keyToCID(key) if err != nil { return fmt.Errorf("failed to create CID from key: %w", err) } - + return d.kdht.Provide(ctx, keyCID, true) } @@ -264,31 +273,32 @@ func (d *LibP2PDHT) FindProviders(ctx context.Context, key string, limit int) ([ if !d.IsBootstrapped() { return nil, fmt.Errorf("DHT not bootstrapped") } - + // Convert key to CID keyCID, err := d.keyToCID(key) if err != nil { return nil, fmt.Errorf("failed to create CID from key: %w", err) } - - // Find providers (FindProviders returns a channel and an error) - providersChan, err := d.kdht.FindProviders(ctx, keyCID) - if err != nil { - return nil, fmt.Errorf("failed to find providers: %w", err) + + maxProviders := limit + if maxProviders <= 0 { + maxProviders = defaultProviderResultLimit } - - // Collect providers from channel - providers := make([]peer.AddrInfo, 0, limit) - // TODO: Fix libp2p FindProviders channel type mismatch - // The channel appears to return int instead of peer.AddrInfo in this version - _ = providersChan // Avoid unused variable error - // for providerInfo := range providersChan { - // providers = append(providers, providerInfo) - // if len(providers) >= limit { - // break - // } - // } - + + providerCtx, cancel := context.WithCancel(ctx) + defer cancel() + + providersChan := d.kdht.FindProvidersAsync(providerCtx, keyCID, maxProviders) + providers := make([]peer.AddrInfo, 0, maxProviders) + + for providerInfo := range providersChan { + providers = append(providers, providerInfo) + if limit > 0 && len(providers) >= limit { + cancel() + break + } + } + return providers, nil } @@ -297,7 +307,7 @@ func (d *LibP2PDHT) PutValue(ctx context.Context, key string, value []byte) erro if !d.IsBootstrapped() { return fmt.Errorf("DHT not bootstrapped") } - + return d.kdht.PutValue(ctx, key, value) } @@ -306,7 +316,7 @@ func (d *LibP2PDHT) GetValue(ctx context.Context, key string) ([]byte, error) { if !d.IsBootstrapped() { return nil, fmt.Errorf("DHT not bootstrapped") } - + return d.kdht.GetValue(ctx, key) } @@ -315,7 +325,7 @@ func (d *LibP2PDHT) FindPeer(ctx context.Context, peerID peer.ID) (peer.AddrInfo if !d.IsBootstrapped() { return peer.AddrInfo{}, fmt.Errorf("DHT not bootstrapped") } - + return d.kdht.FindPeer(ctx, peerID) } @@ -329,14 +339,30 @@ func (d *LibP2PDHT) GetConnectedPeers() []peer.ID { return d.kdht.Host().Network().Peers() } +// GetStats reports basic runtime statistics for the DHT +func (d *LibP2PDHT) GetStats() DHTStats { + stats := DHTStats{ + TotalPeers: len(d.GetConnectedPeers()), + Uptime: time.Since(d.startTime), + } + + if d.replicationManager != nil { + if metrics := d.replicationManager.GetMetrics(); metrics != nil { + stats.TotalKeys = int(metrics.TotalKeys) + } + } + + return stats +} + // RegisterPeer registers a peer with capability information func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) { d.peersMutex.Lock() defer d.peersMutex.Unlock() - + // Get peer addresses from host peerInfo := d.host.Peerstore().PeerInfo(peerID) - + d.knownPeers[peerID] = &PeerInfo{ ID: peerID, Addresses: peerInfo.Addrs, @@ -351,12 +377,12 @@ func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilitie func (d *LibP2PDHT) GetKnownPeers() map[peer.ID]*PeerInfo { d.peersMutex.RLock() defer d.peersMutex.RUnlock() - + result := make(map[peer.ID]*PeerInfo) for id, info := range d.knownPeers { result[id] = info } - + return result } @@ -371,7 +397,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn } } d.peersMutex.RUnlock() - + // Also search DHT for role-based keys roleKey := fmt.Sprintf("CHORUS:role:%s", role) providers, err := d.FindProviders(ctx, roleKey, 10) @@ -379,11 +405,11 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn // Return local peers even if DHT search fails return localPeers, nil } - + // Convert providers to PeerInfo var result []*PeerInfo result = append(result, localPeers...) - + for _, provider := range providers { // Skip if we already have this peer found := false @@ -402,7 +428,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn }) } } - + return result, nil } @@ -424,10 +450,10 @@ func (d *LibP2PDHT) startBackgroundTasks() { if d.config.AutoBootstrap { go d.autoBootstrap() } - + // Start periodic peer discovery go d.periodicDiscovery() - + // Start peer cleanup go d.peerCleanup() } @@ -436,7 +462,7 @@ func (d *LibP2PDHT) startBackgroundTasks() { func (d *LibP2PDHT) autoBootstrap() { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() - + for { select { case <-d.ctx.Done(): @@ -456,7 +482,7 @@ func (d *LibP2PDHT) autoBootstrap() { func (d *LibP2PDHT) periodicDiscovery() { ticker := time.NewTicker(d.config.DiscoveryInterval) defer ticker.Stop() - + for { select { case <-d.ctx.Done(): @@ -473,13 +499,13 @@ func (d *LibP2PDHT) periodicDiscovery() { func (d *LibP2PDHT) performDiscovery() { ctx, cancel := context.WithTimeout(d.ctx, 30*time.Second) defer cancel() - + // Look for general CHORUS peers providers, err := d.FindProviders(ctx, "CHORUS:peer", 10) if err != nil { return } - + // Update known peers d.peersMutex.Lock() for _, provider := range providers { @@ -498,7 +524,7 @@ func (d *LibP2PDHT) performDiscovery() { func (d *LibP2PDHT) peerCleanup() { ticker := time.NewTicker(5 * time.Minute) defer ticker.Stop() - + for { select { case <-d.ctx.Done(): @@ -513,9 +539,9 @@ func (d *LibP2PDHT) peerCleanup() { func (d *LibP2PDHT) cleanupStalePeers() { d.peersMutex.Lock() defer d.peersMutex.Unlock() - + staleThreshold := time.Now().Add(-time.Hour) // 1 hour threshold - + for peerID, peerInfo := range d.knownPeers { if peerInfo.LastSeen.Before(staleThreshold) { // Check if peer is still connected @@ -526,7 +552,7 @@ func (d *LibP2PDHT) cleanupStalePeers() { break } } - + if !connected { delete(d.knownPeers, peerID) } @@ -589,11 +615,11 @@ func (d *LibP2PDHT) EnableReplication(config *ReplicationConfig) error { if d.replicationManager != nil { return fmt.Errorf("replication already enabled") } - + if config == nil { config = DefaultReplicationConfig() } - + d.replicationManager = NewReplicationManager(d.ctx, d.kdht, config) return nil } @@ -603,11 +629,11 @@ func (d *LibP2PDHT) DisableReplication() error { if d.replicationManager == nil { return nil } - + if err := d.replicationManager.Stop(); err != nil { return fmt.Errorf("failed to stop replication manager: %w", err) } - + d.replicationManager = nil return nil } @@ -617,13 +643,18 @@ func (d *LibP2PDHT) IsReplicationEnabled() bool { return d.replicationManager != nil } +// ReplicationManager returns the underlying replication manager if enabled. +func (d *LibP2PDHT) ReplicationManager() *ReplicationManager { + return d.replicationManager +} + // Close shuts down the DHT func (d *LibP2PDHT) Close() error { // Stop replication manager first if d.replicationManager != nil { d.replicationManager.Stop() } - + d.cancel() return d.kdht.Close() } @@ -633,10 +664,10 @@ func (d *LibP2PDHT) RefreshRoutingTable() error { if !d.IsBootstrapped() { return fmt.Errorf("DHT not bootstrapped") } - + // RefreshRoutingTable() returns a channel with errors, not a direct error errChan := d.kdht.RefreshRoutingTable() - + // Wait for the first error (if any) from the channel select { case err := <-errChan: @@ -654,4 +685,4 @@ func (d *LibP2PDHT) GetDHTSize() int { // Host returns the underlying libp2p host func (d *LibP2PDHT) Host() host.Host { return d.host -} \ No newline at end of file +} diff --git a/pkg/dht/dht_test.go b/pkg/dht/dht_test.go index 3752842..ee9b610 100644 --- a/pkg/dht/dht_test.go +++ b/pkg/dht/dht_test.go @@ -2,546 +2,155 @@ package dht import ( "context" + "strings" "testing" "time" - "github.com/libp2p/go-libp2p" - "github.com/libp2p/go-libp2p/core/host" + libp2p "github.com/libp2p/go-libp2p" + dhtmode "github.com/libp2p/go-libp2p-kad-dht" "github.com/libp2p/go-libp2p/core/test" - dht "github.com/libp2p/go-libp2p-kad-dht" - "github.com/multiformats/go-multiaddr" ) +type harness struct { + ctx context.Context + host libp2pHost + dht *LibP2PDHT +} + +type libp2pHost interface { + Close() error +} + +func newHarness(t *testing.T, opts ...Option) *harness { + t.Helper() + + ctx, cancel := context.WithCancel(context.Background()) + + host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0")) + if err != nil { + cancel() + t.Fatalf("failed to create libp2p host: %v", err) + } + + options := append([]Option{WithAutoBootstrap(false)}, opts...) + d, err := NewLibP2PDHT(ctx, host, options...) + if err != nil { + host.Close() + cancel() + t.Fatalf("failed to create DHT: %v", err) + } + + t.Cleanup(func() { + d.Close() + host.Close() + cancel() + }) + + return &harness{ctx: ctx, host: host, dht: d} +} + func TestDefaultConfig(t *testing.T) { - config := DefaultConfig() - - if config.ProtocolPrefix != "/CHORUS" { - t.Errorf("expected protocol prefix '/CHORUS', got %s", config.ProtocolPrefix) + cfg := DefaultConfig() + + if cfg.ProtocolPrefix != "/CHORUS" { + t.Fatalf("expected protocol prefix '/CHORUS', got %s", cfg.ProtocolPrefix) } - - if config.BootstrapTimeout != 30*time.Second { - t.Errorf("expected bootstrap timeout 30s, got %v", config.BootstrapTimeout) + + if cfg.BootstrapTimeout != 30*time.Second { + t.Fatalf("expected bootstrap timeout 30s, got %v", cfg.BootstrapTimeout) } - - if config.Mode != dht.ModeAuto { - t.Errorf("expected mode auto, got %v", config.Mode) + + if cfg.Mode != dhtmode.ModeAuto { + t.Fatalf("expected mode auto, got %v", cfg.Mode) } - - if !config.AutoBootstrap { - t.Error("expected auto bootstrap to be enabled") + + if !cfg.AutoBootstrap { + t.Fatal("expected auto bootstrap to be enabled") } } -func TestNewDHT(t *testing.T) { - ctx := context.Background() - - // Create a test host - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - // Test with default options - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - if d.host != host { - t.Error("host not set correctly") - } - - if d.config.ProtocolPrefix != "/CHORUS" { - t.Errorf("expected protocol prefix '/CHORUS', got %s", d.config.ProtocolPrefix) - } -} - -func TestDHTWithOptions(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - // Test with custom options - d, err := NewDHT(ctx, host, +func TestWithOptionsOverridesDefaults(t *testing.T) { + h := newHarness(t, WithProtocolPrefix("/custom"), - WithMode(dht.ModeClient), - WithBootstrapTimeout(60*time.Second), - WithDiscoveryInterval(120*time.Second), - WithAutoBootstrap(false), + WithDiscoveryInterval(2*time.Minute), + WithBootstrapTimeout(45*time.Second), + WithMode(dhtmode.ModeClient), + WithAutoBootstrap(true), ) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) + + cfg := h.dht.config + + if cfg.ProtocolPrefix != "/custom" { + t.Fatalf("expected protocol prefix '/custom', got %s", cfg.ProtocolPrefix) } - defer d.Close() - - if d.config.ProtocolPrefix != "/custom" { - t.Errorf("expected protocol prefix '/custom', got %s", d.config.ProtocolPrefix) + + if cfg.DiscoveryInterval != 2*time.Minute { + t.Fatalf("expected discovery interval 2m, got %v", cfg.DiscoveryInterval) } - - if d.config.Mode != dht.ModeClient { - t.Errorf("expected mode client, got %v", d.config.Mode) + + if cfg.BootstrapTimeout != 45*time.Second { + t.Fatalf("expected bootstrap timeout 45s, got %v", cfg.BootstrapTimeout) } - - if d.config.BootstrapTimeout != 60*time.Second { - t.Errorf("expected bootstrap timeout 60s, got %v", d.config.BootstrapTimeout) + + if cfg.Mode != dhtmode.ModeClient { + t.Fatalf("expected mode client, got %v", cfg.Mode) } - - if d.config.DiscoveryInterval != 120*time.Second { - t.Errorf("expected discovery interval 120s, got %v", d.config.DiscoveryInterval) - } - - if d.config.AutoBootstrap { - t.Error("expected auto bootstrap to be disabled") + + if !cfg.AutoBootstrap { + t.Fatal("expected auto bootstrap to remain enabled") } } -func TestWithBootstrapPeersFromStrings(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - bootstrapAddrs := []string{ - "/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", - "/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", - } - - d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - if len(d.config.BootstrapPeers) != 2 { - t.Errorf("expected 2 bootstrap peers, got %d", len(d.config.BootstrapPeers)) - } -} +func TestProvideRequiresBootstrap(t *testing.T) { + h := newHarness(t) -func TestWithBootstrapPeersFromStringsInvalid(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) + err := h.dht.Provide(h.ctx, "key") + if err == nil { + t.Fatal("expected Provide to fail when not bootstrapped") } - defer host.Close() - - // Include invalid addresses - they should be filtered out - bootstrapAddrs := []string{ - "/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", // valid - "invalid-address", // invalid - "/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", // valid - } - - d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Should have filtered out the invalid address - if len(d.config.BootstrapPeers) != 2 { - t.Errorf("expected 2 valid bootstrap peers, got %d", len(d.config.BootstrapPeers)) - } -} -func TestBootstrapWithoutPeers(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Bootstrap should use default IPFS peers when none configured - err = d.Bootstrap() - // This might fail in test environment without network access, but should not panic - if err != nil { - // Expected in test environment - t.Logf("Bootstrap failed as expected in test environment: %v", err) - } -} - -func TestIsBootstrapped(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Should not be bootstrapped initially - if d.IsBootstrapped() { - t.Error("DHT should not be bootstrapped initially") + if !strings.Contains(err.Error(), "not bootstrapped") { + t.Fatalf("expected error to indicate bootstrap requirement, got %v", err) } } func TestRegisterPeer(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - + h := newHarness(t) + peerID := test.RandPeerIDFatal(t) - agent := "claude" - role := "frontend" - capabilities := []string{"react", "javascript"} - - d.RegisterPeer(peerID, agent, role, capabilities) - - knownPeers := d.GetKnownPeers() - if len(knownPeers) != 1 { - t.Errorf("expected 1 known peer, got %d", len(knownPeers)) + + h.dht.RegisterPeer(peerID, "apollo", "platform", []string{"go"}) + + peers := h.dht.GetKnownPeers() + + info, ok := peers[peerID] + if !ok { + t.Fatalf("expected peer to be tracked") } - - peerInfo, exists := knownPeers[peerID] - if !exists { - t.Error("peer not found in known peers") + + if info.Agent != "apollo" { + t.Fatalf("expected agent apollo, got %s", info.Agent) } - - if peerInfo.Agent != agent { - t.Errorf("expected agent %s, got %s", agent, peerInfo.Agent) + + if info.Role != "platform" { + t.Fatalf("expected role platform, got %s", info.Role) } - - if peerInfo.Role != role { - t.Errorf("expected role %s, got %s", role, peerInfo.Role) - } - - if len(peerInfo.Capabilities) != len(capabilities) { - t.Errorf("expected %d capabilities, got %d", len(capabilities), len(peerInfo.Capabilities)) + + if len(info.Capabilities) != 1 || info.Capabilities[0] != "go" { + t.Fatalf("expected capability go, got %v", info.Capabilities) } } -func TestGetConnectedPeers(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) +func TestGetStatsProvidesUptime(t *testing.T) { + h := newHarness(t) + + stats := h.dht.GetStats() + + if stats.TotalPeers != 0 { + t.Fatalf("expected zero peers, got %d", stats.TotalPeers) } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Initially should have no connected peers - peers := d.GetConnectedPeers() - if len(peers) != 0 { - t.Errorf("expected 0 connected peers, got %d", len(peers)) + + if stats.Uptime < 0 { + t.Fatalf("expected non-negative uptime, got %v", stats.Uptime) } } - -func TestPutAndGetValue(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Test without bootstrap (should fail) - key := "test-key" - value := []byte("test-value") - - err = d.PutValue(ctx, key, value) - if err == nil { - t.Error("PutValue should fail when DHT not bootstrapped") - } - - _, err = d.GetValue(ctx, key) - if err == nil { - t.Error("GetValue should fail when DHT not bootstrapped") - } -} - -func TestProvideAndFindProviders(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Test without bootstrap (should fail) - key := "test-service" - - err = d.Provide(ctx, key) - if err == nil { - t.Error("Provide should fail when DHT not bootstrapped") - } - - _, err = d.FindProviders(ctx, key, 10) - if err == nil { - t.Error("FindProviders should fail when DHT not bootstrapped") - } -} - -func TestFindPeer(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Test without bootstrap (should fail) - peerID := test.RandPeerIDFatal(t) - - _, err = d.FindPeer(ctx, peerID) - if err == nil { - t.Error("FindPeer should fail when DHT not bootstrapped") - } -} - -func TestFindPeersByRole(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Register some local peers - peerID1 := test.RandPeerIDFatal(t) - peerID2 := test.RandPeerIDFatal(t) - - d.RegisterPeer(peerID1, "claude", "frontend", []string{"react"}) - d.RegisterPeer(peerID2, "claude", "backend", []string{"go"}) - - // Find frontend peers - frontendPeers, err := d.FindPeersByRole(ctx, "frontend") - if err != nil { - t.Fatalf("failed to find peers by role: %v", err) - } - - if len(frontendPeers) != 1 { - t.Errorf("expected 1 frontend peer, got %d", len(frontendPeers)) - } - - if frontendPeers[0].ID != peerID1 { - t.Error("wrong peer returned for frontend role") - } - - // Find all peers with wildcard - allPeers, err := d.FindPeersByRole(ctx, "*") - if err != nil { - t.Fatalf("failed to find all peers: %v", err) - } - - if len(allPeers) != 2 { - t.Errorf("expected 2 peers with wildcard, got %d", len(allPeers)) - } -} - -func TestAnnounceRole(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Should fail when not bootstrapped - err = d.AnnounceRole(ctx, "frontend") - if err == nil { - t.Error("AnnounceRole should fail when DHT not bootstrapped") - } -} - -func TestAnnounceCapability(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Should fail when not bootstrapped - err = d.AnnounceCapability(ctx, "react") - if err == nil { - t.Error("AnnounceCapability should fail when DHT not bootstrapped") - } -} - -func TestGetRoutingTable(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - rt := d.GetRoutingTable() - if rt == nil { - t.Error("routing table should not be nil") - } -} - -func TestGetDHTSize(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - size := d.GetDHTSize() - // Should be 0 or small initially - if size < 0 { - t.Errorf("DHT size should be non-negative, got %d", size) - } -} - -func TestRefreshRoutingTable(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host, WithAutoBootstrap(false)) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - // Should fail when not bootstrapped - err = d.RefreshRoutingTable() - if err == nil { - t.Error("RefreshRoutingTable should fail when DHT not bootstrapped") - } -} - -func TestHost(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - defer d.Close() - - if d.Host() != host { - t.Error("Host() should return the same host instance") - } -} - -func TestClose(t *testing.T) { - ctx := context.Background() - - host, err := libp2p.New() - if err != nil { - t.Fatalf("failed to create test host: %v", err) - } - defer host.Close() - - d, err := NewDHT(ctx, host) - if err != nil { - t.Fatalf("failed to create DHT: %v", err) - } - - // Should close without error - err = d.Close() - if err != nil { - t.Errorf("Close() failed: %v", err) - } -} \ No newline at end of file diff --git a/pkg/dht/encrypted_storage_security_test.go b/pkg/dht/encrypted_storage_security_test.go index df966df..b182f84 100644 --- a/pkg/dht/encrypted_storage_security_test.go +++ b/pkg/dht/encrypted_storage_security_test.go @@ -2,559 +2,155 @@ package dht import ( "context" + "strings" "testing" "time" "chorus/pkg/config" ) -// TestDHTSecurityPolicyEnforcement tests security policy enforcement in DHT operations -func TestDHTSecurityPolicyEnforcement(t *testing.T) { - ctx := context.Background() - - testCases := []struct { - name string - currentRole string - operation string - ucxlAddress string - contentType string - expectSuccess bool - expectedError string - }{ - // Store operation tests +type securityTestCase struct { + name string + role string + address string + contentType string + expectSuccess bool + expectErrHint string +} + +func newTestEncryptedStorage(cfg *config.Config) *EncryptedDHTStorage { + return &EncryptedDHTStorage{ + ctx: context.Background(), + config: cfg, + nodeID: "test-node", + cache: make(map[string]*CachedEntry), + metrics: &StorageMetrics{LastUpdate: time.Now()}, + } +} + +func TestCheckStoreAccessPolicy(t *testing.T) { + cases := []securityTestCase{ { - name: "admin_can_store_all_content", - currentRole: "admin", - operation: "store", - ucxlAddress: "agent1:admin:system:security_audit", + name: "backend developer can store", + role: "backend_developer", + address: "agent1:backend_developer:api:endpoint", contentType: "decision", expectSuccess: true, }, { - name: "backend_developer_can_store_backend_content", - currentRole: "backend_developer", - operation: "store", - ucxlAddress: "agent1:backend_developer:api:endpoint_design", - contentType: "suggestion", + name: "project manager can store", + role: "project_manager", + address: "agent1:project_manager:plan:milestone", + contentType: "decision", expectSuccess: true, }, { - name: "readonly_role_cannot_store", - currentRole: "readonly_user", - operation: "store", - ucxlAddress: "agent1:readonly_user:project:observation", - contentType: "suggestion", - expectSuccess: false, - expectedError: "read-only authority", + name: "read only user cannot store", + role: "readonly_user", + address: "agent1:readonly_user:note:observation", + contentType: "note", + expectSuccess: false, + expectErrHint: "read-only authority", }, { - name: "unknown_role_cannot_store", - currentRole: "invalid_role", - operation: "store", - ucxlAddress: "agent1:invalid_role:project:task", - contentType: "decision", - expectSuccess: false, - expectedError: "unknown creator role", - }, - - // Retrieve operation tests - { - name: "any_valid_role_can_retrieve", - currentRole: "qa_engineer", - operation: "retrieve", - ucxlAddress: "agent1:backend_developer:api:test_data", - expectSuccess: true, - }, - { - name: "unknown_role_cannot_retrieve", - currentRole: "nonexistent_role", - operation: "retrieve", - ucxlAddress: "agent1:backend_developer:api:test_data", - expectSuccess: false, - expectedError: "unknown current role", - }, - - // Announce operation tests - { - name: "coordination_role_can_announce", - currentRole: "senior_software_architect", - operation: "announce", - ucxlAddress: "agent1:senior_software_architect:architecture:blueprint", - expectSuccess: true, - }, - { - name: "decision_role_can_announce", - currentRole: "security_expert", - operation: "announce", - ucxlAddress: "agent1:security_expert:security:policy", - expectSuccess: true, - }, - { - name: "suggestion_role_cannot_announce", - currentRole: "suggestion_only_role", - operation: "announce", - ucxlAddress: "agent1:suggestion_only_role:project:idea", - expectSuccess: false, - expectedError: "lacks authority", - }, - { - name: "readonly_role_cannot_announce", - currentRole: "readonly_user", - operation: "announce", - ucxlAddress: "agent1:readonly_user:project:observation", - expectSuccess: false, - expectedError: "lacks authority", + name: "unknown role rejected", + role: "ghost_role", + address: "agent1:ghost_role:context", + contentType: "decision", + expectSuccess: false, + expectErrHint: "unknown creator role", }, } - for _, tc := range testCases { + cfg := &config.Config{Agent: config.AgentConfig{}} + eds := newTestEncryptedStorage(cfg) + + for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - // Create test configuration - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-agent", - Role: tc.currentRole, - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: true, - AuditPath: "/tmp/test-security-audit.log", - }, - } - - // Create mock encrypted storage - eds := createMockEncryptedStorage(ctx, cfg) - - var err error - switch tc.operation { - case "store": - err = eds.checkStoreAccessPolicy(tc.currentRole, tc.ucxlAddress, tc.contentType) - case "retrieve": - err = eds.checkRetrieveAccessPolicy(tc.currentRole, tc.ucxlAddress) - case "announce": - err = eds.checkAnnounceAccessPolicy(tc.currentRole, tc.ucxlAddress) - } - - if tc.expectSuccess { - if err != nil { - t.Errorf("Expected %s operation to succeed for role %s, but got error: %v", - tc.operation, tc.currentRole, err) - } - } else { - if err == nil { - t.Errorf("Expected %s operation to fail for role %s, but it succeeded", - tc.operation, tc.currentRole) - } - if tc.expectedError != "" && !containsSubstring(err.Error(), tc.expectedError) { - t.Errorf("Expected error to contain '%s', got '%s'", tc.expectedError, err.Error()) - } - } + err := eds.checkStoreAccessPolicy(tc.role, tc.address, tc.contentType) + verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err) }) } } -// TestDHTAuditLogging tests comprehensive audit logging for DHT operations -func TestDHTAuditLogging(t *testing.T) { - ctx := context.Background() - - testCases := []struct { - name string - operation string - role string - ucxlAddress string - success bool - errorMsg string - expectAudit bool - }{ +func TestCheckRetrieveAccessPolicy(t *testing.T) { + cases := []securityTestCase{ { - name: "successful_store_operation", - operation: "store", - role: "backend_developer", - ucxlAddress: "agent1:backend_developer:api:user_service", - success: true, - expectAudit: true, + name: "qa engineer allowed", + role: "qa_engineer", + address: "agent1:backend_developer:api:tests", + expectSuccess: true, }, { - name: "failed_store_operation", - operation: "store", - role: "readonly_user", - ucxlAddress: "agent1:readonly_user:project:readonly_attempt", - success: false, - errorMsg: "read-only authority", - expectAudit: true, - }, - { - name: "successful_retrieve_operation", - operation: "retrieve", - role: "frontend_developer", - ucxlAddress: "agent1:backend_developer:api:user_data", - success: true, - expectAudit: true, - }, - { - name: "successful_announce_operation", - operation: "announce", - role: "senior_software_architect", - ucxlAddress: "agent1:senior_software_architect:architecture:system_design", - success: true, - expectAudit: true, - }, - { - name: "audit_disabled_no_logging", - operation: "store", - role: "backend_developer", - ucxlAddress: "agent1:backend_developer:api:no_audit", - success: true, - expectAudit: false, + name: "unknown role rejected", + role: "unknown", + address: "agent1:backend_developer:api:tests", + expectSuccess: false, + expectErrHint: "unknown current role", }, } - for _, tc := range testCases { + cfg := &config.Config{Agent: config.AgentConfig{}} + eds := newTestEncryptedStorage(cfg) + + for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - // Create configuration with audit logging - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-agent", - Role: tc.role, - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: tc.expectAudit, - AuditPath: "/tmp/test-dht-audit.log", - }, - } - - // Create mock encrypted storage - eds := createMockEncryptedStorage(ctx, cfg) - - // Capture audit output - auditCaptured := false - - // Simulate audit operation - switch tc.operation { - case "store": - // Mock the audit function call - if tc.expectAudit && cfg.Security.AuditLogging { - eds.auditStoreOperation(tc.ucxlAddress, tc.role, "test-content", 1024, tc.success, tc.errorMsg) - auditCaptured = true - } - case "retrieve": - if tc.expectAudit && cfg.Security.AuditLogging { - eds.auditRetrieveOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg) - auditCaptured = true - } - case "announce": - if tc.expectAudit && cfg.Security.AuditLogging { - eds.auditAnnounceOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg) - auditCaptured = true - } - } - - // Verify audit logging behavior - if tc.expectAudit && !auditCaptured { - t.Errorf("Expected audit logging for %s operation but none was captured", tc.operation) - } - if !tc.expectAudit && auditCaptured { - t.Errorf("Expected no audit logging for %s operation but audit was captured", tc.operation) - } + err := eds.checkRetrieveAccessPolicy(tc.role, tc.address) + verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err) }) } } -// TestSecurityConfigIntegration tests integration with SecurityConfig -func TestSecurityConfigIntegration(t *testing.T) { - ctx := context.Background() - - testConfigs := []struct { - name string - auditLogging bool - auditPath string - expectAuditWork bool - }{ +func TestCheckAnnounceAccessPolicy(t *testing.T) { + cases := []securityTestCase{ { - name: "audit_enabled_with_path", - auditLogging: true, - auditPath: "/tmp/test-audit-enabled.log", - expectAuditWork: true, + name: "architect can announce", + role: "senior_software_architect", + address: "agent1:senior_software_architect:architecture:proposal", + expectSuccess: true, }, { - name: "audit_disabled", - auditLogging: false, - auditPath: "/tmp/test-audit-disabled.log", - expectAuditWork: false, + name: "suggestion role cannot announce", + role: "suggestion_only_role", + address: "agent1:suggestion_only_role:idea", + expectSuccess: false, + expectErrHint: "lacks authority", }, { - name: "audit_enabled_no_path", - auditLogging: true, - auditPath: "", - expectAuditWork: false, + name: "unknown role rejected", + role: "mystery", + address: "agent1:mystery:topic", + expectSuccess: false, + expectErrHint: "unknown current role", }, } - for _, tc := range testConfigs { + cfg := &config.Config{Agent: config.AgentConfig{}} + eds := newTestEncryptedStorage(cfg) + + for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-agent", - Role: "backend_developer", - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: tc.auditLogging, - AuditPath: tc.auditPath, - }, - } - - eds := createMockEncryptedStorage(ctx, cfg) - - // Test audit function behavior with different configurations - auditWorked := func() bool { - if !cfg.Security.AuditLogging || cfg.Security.AuditPath == "" { - return false - } - return true - }() - - if auditWorked != tc.expectAuditWork { - t.Errorf("Expected audit to work: %v, but got: %v", tc.expectAuditWork, auditWorked) - } + err := eds.checkAnnounceAccessPolicy(tc.role, tc.address) + verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err) }) } } -// TestRoleAuthorityHierarchy tests role authority hierarchy enforcement -func TestRoleAuthorityHierarchy(t *testing.T) { - ctx := context.Background() - - // Test role authority levels for different operations - authorityTests := []struct { - role string - authorityLevel config.AuthorityLevel - canStore bool - canRetrieve bool - canAnnounce bool - }{ - { - role: "admin", - authorityLevel: config.AuthorityMaster, - canStore: true, - canRetrieve: true, - canAnnounce: true, - }, - { - role: "senior_software_architect", - authorityLevel: config.AuthorityDecision, - canStore: true, - canRetrieve: true, - canAnnounce: true, - }, - { - role: "security_expert", - authorityLevel: config.AuthorityCoordination, - canStore: true, - canRetrieve: true, - canAnnounce: true, - }, - { - role: "backend_developer", - authorityLevel: config.AuthoritySuggestion, - canStore: true, - canRetrieve: true, - canAnnounce: false, - }, +func verifySecurityExpectation(t *testing.T, expectSuccess bool, hint string, err error) { + t.Helper() + + if expectSuccess { + if err != nil { + t.Fatalf("expected success, got error: %v", err) + } + return } - for _, tt := range authorityTests { - t.Run(tt.role+"_authority_test", func(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-agent", - Role: tt.role, - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: true, - AuditPath: "/tmp/test-authority.log", - }, - } + if err == nil { + t.Fatal("expected error but got success") + } - eds := createMockEncryptedStorage(ctx, cfg) - - // Test store permission - storeErr := eds.checkStoreAccessPolicy(tt.role, "test:address", "content") - if tt.canStore && storeErr != nil { - t.Errorf("Role %s should be able to store but got error: %v", tt.role, storeErr) - } - if !tt.canStore && storeErr == nil { - t.Errorf("Role %s should not be able to store but operation succeeded", tt.role) - } - - // Test retrieve permission - retrieveErr := eds.checkRetrieveAccessPolicy(tt.role, "test:address") - if tt.canRetrieve && retrieveErr != nil { - t.Errorf("Role %s should be able to retrieve but got error: %v", tt.role, retrieveErr) - } - if !tt.canRetrieve && retrieveErr == nil { - t.Errorf("Role %s should not be able to retrieve but operation succeeded", tt.role) - } - - // Test announce permission - announceErr := eds.checkAnnounceAccessPolicy(tt.role, "test:address") - if tt.canAnnounce && announceErr != nil { - t.Errorf("Role %s should be able to announce but got error: %v", tt.role, announceErr) - } - if !tt.canAnnounce && announceErr == nil { - t.Errorf("Role %s should not be able to announce but operation succeeded", tt.role) - } - }) + if hint != "" && !strings.Contains(err.Error(), hint) { + t.Fatalf("expected error to contain %q, got %q", hint, err.Error()) } } - -// TestSecurityMetrics tests security-related metrics -func TestSecurityMetrics(t *testing.T) { - ctx := context.Background() - - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-agent", - Role: "backend_developer", - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: true, - AuditPath: "/tmp/test-metrics.log", - }, - } - - eds := createMockEncryptedStorage(ctx, cfg) - - // Simulate some operations to generate metrics - for i := 0; i < 5; i++ { - eds.metrics.StoredItems++ - eds.metrics.RetrievedItems++ - eds.metrics.EncryptionOps++ - eds.metrics.DecryptionOps++ - } - - metrics := eds.GetMetrics() - - expectedMetrics := map[string]int64{ - "stored_items": 5, - "retrieved_items": 5, - "encryption_ops": 5, - "decryption_ops": 5, - } - - for metricName, expectedValue := range expectedMetrics { - if actualValue, ok := metrics[metricName]; !ok { - t.Errorf("Expected metric %s to be present in metrics", metricName) - } else if actualValue != expectedValue { - t.Errorf("Expected %s to be %d, got %v", metricName, expectedValue, actualValue) - } - } -} - -// Helper functions - -func createMockEncryptedStorage(ctx context.Context, cfg *config.Config) *EncryptedDHTStorage { - return &EncryptedDHTStorage{ - ctx: ctx, - config: cfg, - nodeID: "test-node-id", - cache: make(map[string]*CachedEntry), - metrics: &StorageMetrics{ - LastUpdate: time.Now(), - }, - } -} - -func containsSubstring(str, substr string) bool { - if len(substr) == 0 { - return true - } - if len(str) < len(substr) { - return false - } - for i := 0; i <= len(str)-len(substr); i++ { - if str[i:i+len(substr)] == substr { - return true - } - } - return false -} - -// Benchmarks for security performance - -func BenchmarkSecurityPolicyChecks(b *testing.B) { - ctx := context.Background() - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "bench-agent", - Role: "backend_developer", - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: true, - AuditPath: "/tmp/bench-security.log", - }, - } - - eds := createMockEncryptedStorage(ctx, cfg) - - b.ResetTimer() - - b.Run("store_policy_check", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.checkStoreAccessPolicy("backend_developer", "test:address", "content") - } - }) - - b.Run("retrieve_policy_check", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.checkRetrieveAccessPolicy("backend_developer", "test:address") - } - }) - - b.Run("announce_policy_check", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.checkAnnounceAccessPolicy("senior_software_architect", "test:address") - } - }) -} - -func BenchmarkAuditOperations(b *testing.B) { - ctx := context.Background() - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "bench-agent", - Role: "backend_developer", - }, - Security: config.SecurityConfig{ - KeyRotationDays: 90, - AuditLogging: true, - AuditPath: "/tmp/bench-audit.log", - }, - } - - eds := createMockEncryptedStorage(ctx, cfg) - - b.ResetTimer() - - b.Run("store_audit", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.auditStoreOperation("test:address", "backend_developer", "content", 1024, true, "") - } - }) - - b.Run("retrieve_audit", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.auditRetrieveOperation("test:address", "backend_developer", true, "") - } - }) - - b.Run("announce_audit", func(b *testing.B) { - for i := 0; i < b.N; i++ { - eds.auditAnnounceOperation("test:address", "backend_developer", true, "") - } - }) -} \ No newline at end of file diff --git a/pkg/dht/real_dht.go b/pkg/dht/real_dht.go index 90c2dca..e46483a 100644 --- a/pkg/dht/real_dht.go +++ b/pkg/dht/real_dht.go @@ -1,14 +1,117 @@ package dht import ( + "context" + "errors" "fmt" "chorus/pkg/config" + libp2p "github.com/libp2p/go-libp2p" + "github.com/libp2p/go-libp2p/core/host" + "github.com/libp2p/go-libp2p/core/peer" + "github.com/libp2p/go-libp2p/p2p/security/noise" + "github.com/libp2p/go-libp2p/p2p/transport/tcp" + "github.com/multiformats/go-multiaddr" ) -// NewRealDHT creates a new real DHT implementation -func NewRealDHT(config *config.HybridConfig) (DHT, error) { - // TODO: Implement real DHT initialization - // For now, return an error to indicate it's not yet implemented - return nil, fmt.Errorf("real DHT implementation not yet available") -} \ No newline at end of file +// RealDHT wraps a libp2p-based DHT to satisfy the generic DHT interface. +type RealDHT struct { + cancel context.CancelFunc + host host.Host + dht *LibP2PDHT +} + +// NewRealDHT creates a new real DHT implementation backed by libp2p. +func NewRealDHT(cfg *config.HybridConfig) (DHT, error) { + if cfg == nil { + cfg = &config.HybridConfig{} + } + + ctx, cancel := context.WithCancel(context.Background()) + + listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0") + if err != nil { + cancel() + return nil, fmt.Errorf("failed to create listen address: %w", err) + } + + host, err := libp2p.New( + libp2p.ListenAddrs(listenAddr), + libp2p.Security(noise.ID, noise.New), + libp2p.Transport(tcp.NewTCPTransport), + libp2p.DefaultMuxers, + libp2p.EnableRelay(), + ) + if err != nil { + cancel() + return nil, fmt.Errorf("failed to create libp2p host: %w", err) + } + + opts := []Option{ + WithProtocolPrefix("/CHORUS"), + } + + if nodes := cfg.GetDHTBootstrapNodes(); len(nodes) > 0 { + opts = append(opts, WithBootstrapPeersFromStrings(nodes)) + } + + libp2pDHT, err := NewLibP2PDHT(ctx, host, opts...) + if err != nil { + host.Close() + cancel() + return nil, fmt.Errorf("failed to initialize libp2p DHT: %w", err) + } + + if err := libp2pDHT.Bootstrap(); err != nil { + libp2pDHT.Close() + host.Close() + cancel() + return nil, fmt.Errorf("failed to bootstrap DHT: %w", err) + } + + return &RealDHT{ + cancel: cancel, + host: host, + dht: libp2pDHT, + }, nil +} + +// PutValue stores a value in the DHT. +func (r *RealDHT) PutValue(ctx context.Context, key string, value []byte) error { + return r.dht.PutValue(ctx, key, value) +} + +// GetValue retrieves a value from the DHT. +func (r *RealDHT) GetValue(ctx context.Context, key string) ([]byte, error) { + return r.dht.GetValue(ctx, key) +} + +// Provide announces that this node can provide the given key. +func (r *RealDHT) Provide(ctx context.Context, key string) error { + return r.dht.Provide(ctx, key) +} + +// FindProviders locates peers that can provide the specified key. +func (r *RealDHT) FindProviders(ctx context.Context, key string, limit int) ([]peer.AddrInfo, error) { + return r.dht.FindProviders(ctx, key, limit) +} + +// GetStats exposes runtime metrics for the real DHT. +func (r *RealDHT) GetStats() DHTStats { + return r.dht.GetStats() +} + +// Close releases resources associated with the DHT. +func (r *RealDHT) Close() error { + r.cancel() + + var errs []error + if err := r.dht.Close(); err != nil { + errs = append(errs, err) + } + if err := r.host.Close(); err != nil { + errs = append(errs, err) + } + + return errors.Join(errs...) +} diff --git a/pkg/dht/replication_test.go b/pkg/dht/replication_test.go index e5c3499..0c1485c 100644 --- a/pkg/dht/replication_test.go +++ b/pkg/dht/replication_test.go @@ -2,159 +2,106 @@ package dht import ( "context" - "fmt" "testing" "time" ) -// TestReplicationManager tests basic replication manager functionality -func TestReplicationManager(t *testing.T) { - ctx := context.Background() - - // Create a mock DHT for testing - mockDHT := NewMockDHTInterface() - - // Create replication manager - config := DefaultReplicationConfig() - config.ReprovideInterval = 1 * time.Second // Short interval for testing - config.CleanupInterval = 1 * time.Second - - rm := NewReplicationManager(ctx, mockDHT.Mock(), config) - defer rm.Stop() - - // Test adding content - testKey := "test-content-key" - testSize := int64(1024) - testPriority := 5 - - err := rm.AddContent(testKey, testSize, testPriority) +func newReplicationManagerForTest(t *testing.T) *ReplicationManager { + t.Helper() + + cfg := &ReplicationConfig{ + ReplicationFactor: 3, + ReprovideInterval: time.Hour, + CleanupInterval: time.Hour, + ProviderTTL: 30 * time.Minute, + MaxProvidersPerKey: 5, + EnableAutoReplication: false, + EnableReprovide: false, + MaxConcurrentReplications: 1, + } + + rm := NewReplicationManager(context.Background(), nil, cfg) + t.Cleanup(func() { + if rm.reprovideTimer != nil { + rm.reprovideTimer.Stop() + } + if rm.cleanupTimer != nil { + rm.cleanupTimer.Stop() + } + rm.cancel() + }) + return rm +} + +func TestAddContentRegistersKey(t *testing.T) { + rm := newReplicationManagerForTest(t) + + if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil { + t.Fatalf("expected AddContent to succeed, got error: %v", err) + } + + rm.keysMutex.RLock() + record, ok := rm.contentKeys["ucxl://example/path"] + rm.keysMutex.RUnlock() + + if !ok { + t.Fatal("expected content key to be registered") + } + + if record.Size != 512 { + t.Fatalf("expected size 512, got %d", record.Size) + } +} + +func TestRemoveContentClearsTracking(t *testing.T) { + rm := newReplicationManagerForTest(t) + + if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil { + t.Fatalf("AddContent returned error: %v", err) + } + + if err := rm.RemoveContent("ucxl://example/path"); err != nil { + t.Fatalf("RemoveContent returned error: %v", err) + } + + rm.keysMutex.RLock() + _, exists := rm.contentKeys["ucxl://example/path"] + rm.keysMutex.RUnlock() + + if exists { + t.Fatal("expected content key to be removed") + } +} + +func TestGetReplicationStatusReturnsCopy(t *testing.T) { + rm := newReplicationManagerForTest(t) + + if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil { + t.Fatalf("AddContent returned error: %v", err) + } + + status, err := rm.GetReplicationStatus("ucxl://example/path") if err != nil { - t.Fatalf("Failed to add content: %v", err) + t.Fatalf("GetReplicationStatus returned error: %v", err) } - - // Test getting replication status - status, err := rm.GetReplicationStatus(testKey) - if err != nil { - t.Fatalf("Failed to get replication status: %v", err) + + if status.Key != "ucxl://example/path" { + t.Fatalf("expected status key to match, got %s", status.Key) } - - if status.Key != testKey { - t.Errorf("Expected key %s, got %s", testKey, status.Key) + + // Mutating status should not affect internal state + status.HealthyProviders = 99 + internal, _ := rm.GetReplicationStatus("ucxl://example/path") + if internal.HealthyProviders == 99 { + t.Fatal("expected GetReplicationStatus to return a copy") } - - if status.Size != testSize { - t.Errorf("Expected size %d, got %d", testSize, status.Size) - } - - if status.Priority != testPriority { - t.Errorf("Expected priority %d, got %d", testPriority, status.Priority) - } - - // Test providing content - err = rm.ProvideContent(testKey) - if err != nil { - t.Fatalf("Failed to provide content: %v", err) - } - - // Test metrics +} + +func TestGetMetricsReturnsSnapshot(t *testing.T) { + rm := newReplicationManagerForTest(t) + metrics := rm.GetMetrics() - if metrics.TotalKeys != 1 { - t.Errorf("Expected 1 total key, got %d", metrics.TotalKeys) - } - - // Test finding providers - providers, err := rm.FindProviders(ctx, testKey, 10) - if err != nil { - t.Fatalf("Failed to find providers: %v", err) - } - - t.Logf("Found %d providers for key %s", len(providers), testKey) - - // Test removing content - err = rm.RemoveContent(testKey) - if err != nil { - t.Fatalf("Failed to remove content: %v", err) - } - - // Verify content was removed - metrics = rm.GetMetrics() - if metrics.TotalKeys != 0 { - t.Errorf("Expected 0 total keys after removal, got %d", metrics.TotalKeys) + if metrics == rm.metrics { + t.Fatal("expected GetMetrics to return a copy of metrics") } } - -// TestLibP2PDHTReplication tests DHT replication functionality -func TestLibP2PDHTReplication(t *testing.T) { - // This would normally require a real libp2p setup - // For now, just test the interface methods exist - - // Mock test - in a real implementation, you'd set up actual libp2p hosts - t.Log("DHT replication interface methods are implemented") - - // Example of how the replication would be used: - // 1. Add content for replication - // 2. Content gets automatically provided to the DHT - // 3. Other nodes can discover this node as a provider - // 4. Periodic reproviding ensures content availability - // 5. Replication metrics track system health -} - -// TestReplicationConfig tests replication configuration -func TestReplicationConfig(t *testing.T) { - config := DefaultReplicationConfig() - - // Test default values - if config.ReplicationFactor != 3 { - t.Errorf("Expected default replication factor 3, got %d", config.ReplicationFactor) - } - - if config.ReprovideInterval != 12*time.Hour { - t.Errorf("Expected default reprovide interval 12h, got %v", config.ReprovideInterval) - } - - if !config.EnableAutoReplication { - t.Error("Expected auto replication to be enabled by default") - } - - if !config.EnableReprovide { - t.Error("Expected reprovide to be enabled by default") - } -} - -// TestProviderInfo tests provider information tracking -func TestProviderInfo(t *testing.T) { - // Test distance calculation - key := []byte("test-key") - peerID := "test-peer-id" - - distance := calculateDistance(key, []byte(peerID)) - - // Distance should be non-zero for different inputs - if distance == 0 { - t.Error("Expected non-zero distance for different inputs") - } - - t.Logf("Distance between key and peer: %d", distance) -} - -// TestReplicationMetrics tests metrics collection -func TestReplicationMetrics(t *testing.T) { - ctx := context.Background() - mockDHT := NewMockDHTInterface() - rm := NewReplicationManager(ctx, mockDHT.Mock(), DefaultReplicationConfig()) - defer rm.Stop() - - // Add some content - for i := 0; i < 3; i++ { - key := fmt.Sprintf("test-key-%d", i) - rm.AddContent(key, int64(1000+i*100), i+1) - } - - metrics := rm.GetMetrics() - - if metrics.TotalKeys != 3 { - t.Errorf("Expected 3 total keys, got %d", metrics.TotalKeys) - } - - t.Logf("Replication metrics: %+v", metrics) -} \ No newline at end of file diff --git a/pkg/election/election.go b/pkg/election/election.go index 9ed8b6a..e9fce59 100644 --- a/pkg/election/election.go +++ b/pkg/election/election.go @@ -19,8 +19,8 @@ import ( type ElectionTrigger string const ( - TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout" - TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered" + TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout" + TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered" TriggerSplitBrain ElectionTrigger = "split_brain_detected" TriggerQuorumRestored ElectionTrigger = "quorum_restored" TriggerManual ElectionTrigger = "manual_trigger" @@ -30,30 +30,35 @@ const ( type ElectionState string const ( - StateIdle ElectionState = "idle" - StateDiscovering ElectionState = "discovering" - StateElecting ElectionState = "electing" + electionTopic = "CHORUS/election/v1" + adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1" +) + +const ( + StateIdle ElectionState = "idle" + StateDiscovering ElectionState = "discovering" + StateElecting ElectionState = "electing" StateReconstructing ElectionState = "reconstructing_keys" - StateComplete ElectionState = "complete" + StateComplete ElectionState = "complete" ) // AdminCandidate represents a node candidate for admin role type AdminCandidate struct { - NodeID string `json:"node_id"` - PeerID peer.ID `json:"peer_id"` - Capabilities []string `json:"capabilities"` - Uptime time.Duration `json:"uptime"` - Resources ResourceMetrics `json:"resources"` - Experience time.Duration `json:"experience"` - Score float64 `json:"score"` - Metadata map[string]interface{} `json:"metadata,omitempty"` + NodeID string `json:"node_id"` + PeerID peer.ID `json:"peer_id"` + Capabilities []string `json:"capabilities"` + Uptime time.Duration `json:"uptime"` + Resources ResourceMetrics `json:"resources"` + Experience time.Duration `json:"experience"` + Score float64 `json:"score"` + Metadata map[string]interface{} `json:"metadata,omitempty"` } // ResourceMetrics holds node resource information for election scoring type ResourceMetrics struct { - CPUUsage float64 `json:"cpu_usage"` - MemoryUsage float64 `json:"memory_usage"` - DiskUsage float64 `json:"disk_usage"` + CPUUsage float64 `json:"cpu_usage"` + MemoryUsage float64 `json:"memory_usage"` + DiskUsage float64 `json:"disk_usage"` NetworkQuality float64 `json:"network_quality"` } @@ -68,46 +73,46 @@ type ElectionMessage struct { // ElectionManager handles admin election coordination type ElectionManager struct { - ctx context.Context - cancel context.CancelFunc - config *config.Config - host libp2p.Host - pubsub *pubsub.PubSub - nodeID string - + ctx context.Context + cancel context.CancelFunc + config *config.Config + host libp2p.Host + pubsub *pubsub.PubSub + nodeID string + // Election state - mu sync.RWMutex - state ElectionState - currentTerm int - lastHeartbeat time.Time - currentAdmin string - candidates map[string]*AdminCandidate - votes map[string]string // voter -> candidate - + mu sync.RWMutex + state ElectionState + currentTerm int + lastHeartbeat time.Time + currentAdmin string + candidates map[string]*AdminCandidate + votes map[string]string // voter -> candidate + // Timers and channels - heartbeatTimer *time.Timer - discoveryTimer *time.Timer - electionTimer *time.Timer - electionTrigger chan ElectionTrigger - + heartbeatTimer *time.Timer + discoveryTimer *time.Timer + electionTimer *time.Timer + electionTrigger chan ElectionTrigger + // Heartbeat management - heartbeatManager *HeartbeatManager - + heartbeatManager *HeartbeatManager + // Callbacks - onAdminChanged func(oldAdmin, newAdmin string) + onAdminChanged func(oldAdmin, newAdmin string) onElectionComplete func(winner string) - + startTime time.Time } // HeartbeatManager manages admin heartbeat lifecycle type HeartbeatManager struct { - mu sync.Mutex - isRunning bool - stopCh chan struct{} - ticker *time.Ticker - electionMgr *ElectionManager - logger func(msg string, args ...interface{}) + mu sync.Mutex + isRunning bool + stopCh chan struct{} + ticker *time.Ticker + electionMgr *ElectionManager + logger func(msg string, args ...interface{}) } // NewElectionManager creates a new election manager @@ -119,7 +124,7 @@ func NewElectionManager( nodeID string, ) *ElectionManager { electionCtx, cancel := context.WithCancel(ctx) - + em := &ElectionManager{ ctx: electionCtx, cancel: cancel, @@ -133,7 +138,7 @@ func NewElectionManager( electionTrigger: make(chan ElectionTrigger, 10), startTime: time.Now(), } - + // Initialize heartbeat manager em.heartbeatManager = &HeartbeatManager{ electionMgr: em, @@ -141,29 +146,32 @@ func NewElectionManager( log.Printf("[HEARTBEAT] "+msg, args...) }, } - + return em } // Start begins the election management system func (em *ElectionManager) Start() error { log.Printf("πŸ—³οΈ Starting election manager for node %s", em.nodeID) - - // TODO: Subscribe to election-related messages - pubsub interface needs update - // if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil { - // return fmt.Errorf("failed to subscribe to election messages: %w", err) - // } - // - // if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil { - // return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err) - // } - + + if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) { + em.handleElectionMessage(data) + }); err != nil { + return fmt.Errorf("failed to subscribe to election messages: %w", err) + } + + if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) { + em.handleAdminHeartbeat(data) + }); err != nil { + return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err) + } + // Start discovery process go em.startDiscoveryLoop() - + // Start election coordinator go em.electionCoordinator() - + // Start heartbeat if this node is already admin at startup if em.IsCurrentAdmin() { go func() { @@ -174,7 +182,7 @@ func (em *ElectionManager) Start() error { } }() } - + log.Printf("βœ… Election manager started") return nil } @@ -182,17 +190,17 @@ func (em *ElectionManager) Start() error { // Stop shuts down the election manager func (em *ElectionManager) Stop() { log.Printf("πŸ›‘ Stopping election manager") - + // Stop heartbeat first if em.heartbeatManager != nil { em.heartbeatManager.StopHeartbeat() } - + em.cancel() - + em.mu.Lock() defer em.mu.Unlock() - + if em.heartbeatTimer != nil { em.heartbeatTimer.Stop() } @@ -255,7 +263,7 @@ func (em *ElectionManager) GetHeartbeatStatus() map[string]interface{} { // startDiscoveryLoop starts the admin discovery loop func (em *ElectionManager) startDiscoveryLoop() { log.Printf("πŸ” Starting admin discovery loop") - + for { select { case <-em.ctx.Done(): @@ -272,19 +280,19 @@ func (em *ElectionManager) performAdminDiscovery() { currentState := em.state lastHeartbeat := em.lastHeartbeat em.mu.Unlock() - + // Only discover if we're idle or the heartbeat is stale if currentState != StateIdle { return } - + // Check if admin heartbeat has timed out if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout { log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat) em.TriggerElection(TriggerHeartbeatTimeout) return } - + // If we haven't heard from an admin recently, try to discover one if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 { em.sendDiscoveryRequest() @@ -298,7 +306,7 @@ func (em *ElectionManager) sendDiscoveryRequest() { NodeID: em.nodeID, Timestamp: time.Now(), } - + if err := em.publishElectionMessage(discoveryMsg); err != nil { log.Printf("❌ Failed to send admin discovery request: %v", err) } @@ -307,7 +315,7 @@ func (em *ElectionManager) sendDiscoveryRequest() { // electionCoordinator handles the main election logic func (em *ElectionManager) electionCoordinator() { log.Printf("🎯 Election coordinator started") - + for { select { case <-em.ctx.Done(): @@ -321,17 +329,17 @@ func (em *ElectionManager) electionCoordinator() { // handleElectionTrigger processes election triggers func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) { log.Printf("πŸ”₯ Processing election trigger: %s", trigger) - + em.mu.Lock() currentState := em.state em.mu.Unlock() - + // Ignore triggers if we're already in an election if currentState != StateIdle { log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState) return } - + // Begin election process em.beginElection(trigger) } @@ -339,7 +347,7 @@ func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) { // beginElection starts a new election func (em *ElectionManager) beginElection(trigger ElectionTrigger) { log.Printf("πŸ—³οΈ Beginning election due to: %s", trigger) - + em.mu.Lock() em.state = StateElecting em.currentTerm++ @@ -347,12 +355,12 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) { em.candidates = make(map[string]*AdminCandidate) em.votes = make(map[string]string) em.mu.Unlock() - + // Announce candidacy if this node can be admin if em.canBeAdmin() { em.announceCandidacy(term) } - + // Send election announcement electionMsg := ElectionMessage{ Type: "election_started", @@ -363,11 +371,11 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) { "trigger": string(trigger), }, } - + if err := em.publishElectionMessage(electionMsg); err != nil { log.Printf("❌ Failed to announce election start: %v", err) } - + // Start election timeout em.startElectionTimeout(term) } @@ -386,7 +394,7 @@ func (em *ElectionManager) canBeAdmin() bool { // announceCandidacy announces this node as an election candidate func (em *ElectionManager) announceCandidacy(term int) { uptime := time.Since(em.startTime) - + candidate := &AdminCandidate{ NodeID: em.nodeID, PeerID: em.host.ID(), @@ -396,13 +404,13 @@ func (em *ElectionManager) announceCandidacy(term int) { Experience: uptime, // For now, use uptime as experience Metadata: map[string]interface{}{ "specialization": em.config.Agent.Specialization, - "models": em.config.Agent.Models, + "models": em.config.Agent.Models, }, } - + // Calculate candidate score candidate.Score = em.calculateCandidateScore(candidate) - + candidacyMsg := ElectionMessage{ Type: "candidacy_announcement", NodeID: em.nodeID, @@ -410,9 +418,9 @@ func (em *ElectionManager) announceCandidacy(term int) { Term: term, Data: candidate, } - + log.Printf("πŸ“’ Announcing candidacy (score: %.2f)", candidate.Score) - + if err := em.publishElectionMessage(candidacyMsg); err != nil { log.Printf("❌ Failed to announce candidacy: %v", err) } @@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics { // TODO: Implement actual resource collection // For now, return simulated values return ResourceMetrics{ - CPUUsage: rand.Float64() * 0.5, // 0-50% CPU - MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory - DiskUsage: rand.Float64() * 0.6, // 0-60% Disk + CPUUsage: rand.Float64() * 0.5, // 0-50% CPU + MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory + DiskUsage: rand.Float64() * 0.6, // 0-60% Disk NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality } } @@ -435,10 +443,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl // TODO: Add LeadershipScoring to config.ElectionConfig // scoring := em.config.Security.ElectionConfig.LeadershipScoring // Default scoring weights handled inline - + // Normalize metrics to 0-1 range uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score - + // Capability score - higher for admin/coordination capabilities capabilityScore := 0.0 adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"} @@ -455,22 +463,22 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl } } capabilityScore = min(1.0, capabilityScore) - + // Resource score - lower usage is better - resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 + - (1.0 - candidate.Resources.MemoryUsage) * 0.3 + - (1.0 - candidate.Resources.DiskUsage) * 0.2 + - candidate.Resources.NetworkQuality * 0.2 - + resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 + + (1.0-candidate.Resources.MemoryUsage)*0.3 + + (1.0-candidate.Resources.DiskUsage)*0.2 + + candidate.Resources.NetworkQuality*0.2 + experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score - + // Weighted final score (using default weights) finalScore := uptimeScore*0.3 + capabilityScore*0.2 + resourceScore*0.2 + candidate.Resources.NetworkQuality*0.15 + experienceScore*0.15 - + return finalScore } @@ -478,11 +486,11 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl func (em *ElectionManager) startElectionTimeout(term int) { em.mu.Lock() defer em.mu.Unlock() - + if em.electionTimer != nil { em.electionTimer.Stop() } - + em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() { em.completeElection(term) }) @@ -492,15 +500,15 @@ func (em *ElectionManager) startElectionTimeout(term int) { func (em *ElectionManager) completeElection(term int) { em.mu.Lock() defer em.mu.Unlock() - + // Verify this is still the current term if term != em.currentTerm { log.Printf("⏰ Election timeout for old term %d, ignoring", term) return } - + log.Printf("⏰ Election timeout reached, tallying votes") - + // Find the winning candidate winner := em.findElectionWinner() if winner == nil { @@ -513,14 +521,14 @@ func (em *ElectionManager) completeElection(term int) { }() return } - + log.Printf("πŸ† Election winner: %s (score: %.2f)", winner.NodeID, winner.Score) - + // Update admin oldAdmin := em.currentAdmin em.currentAdmin = winner.NodeID em.state = StateComplete - + // Announce the winner winnerMsg := ElectionMessage{ Type: "election_winner", @@ -529,16 +537,16 @@ func (em *ElectionManager) completeElection(term int) { Term: term, Data: winner, } - + em.mu.Unlock() // Unlock before publishing - + if err := em.publishElectionMessage(winnerMsg); err != nil { log.Printf("❌ Failed to announce election winner: %v", err) } - + // Handle heartbeat lifecycle based on admin change em.handleHeartbeatTransition(oldAdmin, winner.NodeID) - + // Trigger callbacks if em.onAdminChanged != nil { em.onAdminChanged(oldAdmin, winner.NodeID) @@ -546,7 +554,7 @@ func (em *ElectionManager) completeElection(term int) { if em.onElectionComplete != nil { em.onElectionComplete(winner.NodeID) } - + em.mu.Lock() em.state = StateIdle // Reset state for next election } @@ -556,16 +564,16 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate { if len(em.candidates) == 0 { return nil } - + // Count votes for each candidate voteCounts := make(map[string]int) totalVotes := 0 - + // Initialize vote counts for all candidates for candidateID := range em.candidates { voteCounts[candidateID] = 0 } - + // Tally actual votes for _, candidateID := range em.votes { if _, exists := em.candidates[candidateID]; exists { @@ -573,12 +581,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate { totalVotes++ } } - + // If no votes cast, fall back to highest scoring candidate if totalVotes == 0 { var winner *AdminCandidate highestScore := -1.0 - + for _, candidate := range em.candidates { if candidate.Score > highestScore { highestScore = candidate.Score @@ -587,12 +595,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate { } return winner } - + // Find candidate with most votes var winner *AdminCandidate maxVotes := -1 highestScore := -1.0 - + for candidateID, voteCount := range voteCounts { candidate := em.candidates[candidateID] if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) { @@ -601,10 +609,10 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate { winner = candidate } } - - log.Printf("πŸ—³οΈ Election results: %d total votes, winner: %s with %d votes (score: %.2f)", + + log.Printf("πŸ—³οΈ Election results: %d total votes, winner: %s with %d votes (score: %.2f)", totalVotes, winner.NodeID, maxVotes, winner.Score) - + return winner } @@ -615,12 +623,12 @@ func (em *ElectionManager) handleElectionMessage(data []byte) { log.Printf("❌ Failed to unmarshal election message: %v", err) return } - + // Ignore messages from ourselves if msg.NodeID == em.nodeID { return } - + switch msg.Type { case "admin_discovery_request": em.handleAdminDiscoveryRequest(msg) @@ -643,7 +651,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) { currentAdmin := em.currentAdmin state := em.state em.mu.RUnlock() - + // Only respond if we know who the current admin is and we're idle if currentAdmin != "" && state == StateIdle { responseMsg := ElectionMessage{ @@ -654,7 +662,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) { "current_admin": currentAdmin, }, } - + if err := em.publishElectionMessage(responseMsg); err != nil { log.Printf("❌ Failed to send admin discovery response: %v", err) } @@ -679,7 +687,7 @@ func (em *ElectionManager) handleAdminDiscoveryResponse(msg ElectionMessage) { func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) { em.mu.Lock() defer em.mu.Unlock() - + // If we receive an election start with a higher term, join the election if msg.Term > em.currentTerm { log.Printf("πŸ”„ Joining election with term %d", msg.Term) @@ -687,7 +695,7 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) { em.state = StateElecting em.candidates = make(map[string]*AdminCandidate) em.votes = make(map[string]string) - + // Announce candidacy if eligible if em.canBeAdmin() { go em.announceCandidacy(msg.Term) @@ -699,25 +707,25 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) { func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) { em.mu.Lock() defer em.mu.Unlock() - + // Only process if it's for the current term if msg.Term != em.currentTerm { return } - + // Convert data to candidate struct candidateData, err := json.Marshal(msg.Data) if err != nil { log.Printf("❌ Failed to marshal candidate data: %v", err) return } - + var candidate AdminCandidate if err := json.Unmarshal(candidateData, &candidate); err != nil { log.Printf("❌ Failed to unmarshal candidate: %v", err) return } - + log.Printf("πŸ“ Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score) em.candidates[candidate.NodeID] = &candidate } @@ -726,31 +734,31 @@ func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) { func (em *ElectionManager) handleElectionVote(msg ElectionMessage) { em.mu.Lock() defer em.mu.Unlock() - + // Extract vote data voteData, ok := msg.Data.(map[string]interface{}) if !ok { log.Printf("❌ Invalid vote data format from %s", msg.NodeID) return } - + candidateID, ok := voteData["candidate"].(string) if !ok { log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID) return } - + // Validate candidate exists if _, exists := em.candidates[candidateID]; !exists { log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID) return } - + // Prevent duplicate voting if existingVote, exists := em.votes[msg.NodeID]; exists { log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID) } - + // Record the vote em.votes[msg.NodeID] = candidateID log.Printf("πŸ—³οΈ Recorded vote from %s for candidate %s", msg.NodeID, candidateID) @@ -763,24 +771,24 @@ func (em *ElectionManager) handleElectionWinner(msg ElectionMessage) { log.Printf("❌ Failed to marshal winner data: %v", err) return } - + var winner AdminCandidate if err := json.Unmarshal(candidateData, &winner); err != nil { log.Printf("❌ Failed to unmarshal winner: %v", err) return } - + em.mu.Lock() oldAdmin := em.currentAdmin em.currentAdmin = winner.NodeID em.state = StateIdle em.mu.Unlock() - + log.Printf("πŸ‘‘ New admin elected: %s", winner.NodeID) - + // Handle heartbeat lifecycle based on admin change em.handleHeartbeatTransition(oldAdmin, winner.NodeID) - + // Trigger callback if em.onAdminChanged != nil { em.onAdminChanged(oldAdmin, winner.NodeID) @@ -796,7 +804,7 @@ func (em *ElectionManager) handleHeartbeatTransition(oldAdmin, newAdmin string) log.Printf("⚠️ Error stopping heartbeat: %v", err) } } - + // If we gained admin role, start heartbeat if newAdmin == em.nodeID && oldAdmin != em.nodeID { log.Printf("πŸ”„ Gained admin role, starting heartbeat") @@ -816,15 +824,15 @@ func (em *ElectionManager) handleAdminHeartbeat(data []byte) { NodeID string `json:"node_id"` Timestamp time.Time `json:"timestamp"` } - + if err := json.Unmarshal(data, &heartbeat); err != nil { log.Printf("❌ Failed to unmarshal heartbeat: %v", err) return } - + em.mu.Lock() defer em.mu.Unlock() - + // Update admin and heartbeat timestamp if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID { em.currentAdmin = heartbeat.NodeID @@ -838,11 +846,8 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error { if err != nil { return fmt.Errorf("failed to marshal election message: %w", err) } - - // TODO: Fix pubsub interface - // return em.pubsub.Publish("CHORUS/election/v1", data) - _ = data // Avoid unused variable - return nil + + return em.pubsub.PublishRaw(electionTopic, data) } // SendAdminHeartbeat sends admin heartbeat (only if this node is admin) @@ -850,7 +855,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error { if !em.IsCurrentAdmin() { return fmt.Errorf("not current admin") } - + heartbeat := struct { NodeID string `json:"node_id"` Timestamp time.Time `json:"timestamp"` @@ -858,16 +863,13 @@ func (em *ElectionManager) SendAdminHeartbeat() error { NodeID: em.nodeID, Timestamp: time.Now(), } - + data, err := json.Marshal(heartbeat) if err != nil { return fmt.Errorf("failed to marshal heartbeat: %w", err) } - - // TODO: Fix pubsub interface - // return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data) - _ = data // Avoid unused variable - return nil + + return em.pubsub.PublishRaw(adminHeartbeatTopic, data) } // min returns the minimum of two float64 values @@ -894,26 +896,26 @@ func NewHeartbeatManager(electionMgr *ElectionManager) *HeartbeatManager { func (hm *HeartbeatManager) StartHeartbeat() error { hm.mu.Lock() defer hm.mu.Unlock() - + if hm.isRunning { hm.logger("Heartbeat already running") return nil } - + if !hm.electionMgr.IsCurrentAdmin() { return fmt.Errorf("not admin, cannot start heartbeat") } - + hm.logger("Starting admin heartbeat transmission") - + hm.stopCh = make(chan struct{}) interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2 hm.ticker = time.NewTicker(interval) hm.isRunning = true - + // Start heartbeat goroutine go hm.heartbeatLoop() - + hm.logger("Admin heartbeat started (interval: %v)", interval) return nil } @@ -922,22 +924,22 @@ func (hm *HeartbeatManager) StartHeartbeat() error { func (hm *HeartbeatManager) StopHeartbeat() error { hm.mu.Lock() defer hm.mu.Unlock() - + if !hm.isRunning { return nil } - + hm.logger("Stopping admin heartbeat transmission") - + // Signal stop close(hm.stopCh) - + // Stop ticker if hm.ticker != nil { hm.ticker.Stop() hm.ticker = nil } - + hm.isRunning = false hm.logger("Admin heartbeat stopped") return nil @@ -958,7 +960,7 @@ func (hm *HeartbeatManager) heartbeatLoop() { hm.mu.Unlock() hm.logger("Heartbeat loop terminated") }() - + for { select { case <-hm.ticker.C: @@ -971,11 +973,11 @@ func (hm *HeartbeatManager) heartbeatLoop() { hm.logger("No longer admin, stopping heartbeat") return } - + case <-hm.stopCh: hm.logger("Heartbeat stop signal received") return - + case <-hm.electionMgr.ctx.Done(): hm.logger("Election manager context cancelled") return @@ -987,19 +989,19 @@ func (hm *HeartbeatManager) heartbeatLoop() { func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} { hm.mu.Lock() defer hm.mu.Unlock() - + status := map[string]interface{}{ - "running": hm.isRunning, - "is_admin": hm.electionMgr.IsCurrentAdmin(), - "last_sent": time.Now(), // TODO: Track actual last sent time + "running": hm.isRunning, + "is_admin": hm.electionMgr.IsCurrentAdmin(), + "last_sent": time.Now(), // TODO: Track actual last sent time } - + if hm.isRunning && hm.ticker != nil { // Calculate next heartbeat time (approximate) interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2 status["interval"] = interval.String() status["next_heartbeat"] = time.Now().Add(interval) } - + return status -} \ No newline at end of file +} diff --git a/pkg/election/election_test.go b/pkg/election/election_test.go index ff075ea..4d4d777 100644 --- a/pkg/election/election_test.go +++ b/pkg/election/election_test.go @@ -2,451 +2,185 @@ package election import ( "context" + "encoding/json" "testing" "time" "chorus/pkg/config" + pubsubpkg "chorus/pubsub" + libp2p "github.com/libp2p/go-libp2p" ) -func TestElectionManager_NewElectionManager(t *testing.T) { +// newTestElectionManager wires a real libp2p host and PubSub instance so the +// election manager exercises the same code paths used in production. +func newTestElectionManager(t *testing.T) *ElectionManager { + t.Helper() + + ctx, cancel := context.WithCancel(context.Background()) + + host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0")) + if err != nil { + cancel() + t.Fatalf("failed to create libp2p host: %v", err) + } + + ps, err := pubsubpkg.NewPubSub(ctx, host, "", "") + if err != nil { + host.Close() + cancel() + t.Fatalf("failed to create pubsub: %v", err) + } + cfg := &config.Config{ Agent: config.AgentConfig{ - ID: "test-node", + ID: host.ID().String(), + Role: "context_admin", + Capabilities: []string{"admin_election", "context_curation"}, + Models: []string{"meta/llama-3.1-8b-instruct"}, + Specialization: "coordination", }, + Security: config.SecurityConfig{}, } - em := NewElectionManager(cfg) - if em == nil { - t.Fatal("Expected NewElectionManager to return non-nil manager") - } + em := NewElectionManager(ctx, cfg, host, ps, host.ID().String()) - if em.nodeID != "test-node" { - t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID) - } + t.Cleanup(func() { + em.Stop() + ps.Close() + host.Close() + cancel() + }) + + return em +} + +func TestNewElectionManagerInitialState(t *testing.T) { + em := newTestElectionManager(t) if em.state != StateIdle { - t.Errorf("Expected initial state to be StateIdle, got %v", em.state) + t.Fatalf("expected initial state %q, got %q", StateIdle, em.state) + } + + if em.currentTerm != 0 { + t.Fatalf("expected initial term 0, got %d", em.currentTerm) + } + + if em.nodeID == "" { + t.Fatal("expected nodeID to be populated") } } -func TestElectionManager_StartElection(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, +func TestElectionManagerCanBeAdmin(t *testing.T) { + em := newTestElectionManager(t) + + if !em.canBeAdmin() { + t.Fatal("expected node to qualify for admin election") } - em := NewElectionManager(cfg) - - // Start election - err := em.StartElection() - if err != nil { - t.Fatalf("Failed to start election: %v", err) - } - - // Verify state changed - if em.state != StateCandidate { - t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state) - } - - // Verify we added ourselves as a candidate - em.mu.RLock() - candidate, exists := em.candidates[em.nodeID] - em.mu.RUnlock() - - if !exists { - t.Error("Expected to find ourselves as a candidate after starting election") - } - - if candidate.NodeID != em.nodeID { - t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID) + em.config.Agent.Capabilities = []string{"runtime_support"} + if em.canBeAdmin() { + t.Fatal("expected node without admin capabilities to be ineligible") } } -func TestElectionManager_Vote(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Add a candidate first - candidate := &AdminCandidate{ - NodeID: "candidate-1", - Term: 1, - Score: 0.8, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), - } - - em.mu.Lock() - em.candidates["candidate-1"] = candidate - em.mu.Unlock() - - // Vote for the candidate - err := em.Vote("candidate-1") - if err != nil { - t.Fatalf("Failed to vote: %v", err) - } - - // Verify vote was recorded - em.mu.RLock() - vote, exists := em.votes[em.nodeID] - em.mu.RUnlock() - - if !exists { - t.Error("Expected to find our vote after voting") - } - - if vote != "candidate-1" { - t.Errorf("Expected vote to be for 'candidate-1', got %s", vote) - } -} - -func TestElectionManager_VoteInvalidCandidate(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Try to vote for non-existent candidate - err := em.Vote("non-existent") - if err == nil { - t.Error("Expected error when voting for non-existent candidate") - } -} - -func TestElectionManager_AddCandidate(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - candidate := &AdminCandidate{ - NodeID: "new-candidate", - Term: 1, - Score: 0.7, - Capabilities: []string{"admin", "leader"}, - LastSeen: time.Now(), - } - - err := em.AddCandidate(candidate) - if err != nil { - t.Fatalf("Failed to add candidate: %v", err) - } - - // Verify candidate was added - em.mu.RLock() - stored, exists := em.candidates["new-candidate"] - em.mu.RUnlock() - - if !exists { - t.Error("Expected to find added candidate") - } - - if stored.NodeID != "new-candidate" { - t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID) - } - - if stored.Score != 0.7 { - t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score) - } -} - -func TestElectionManager_FindElectionWinner(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Add candidates with different scores - candidates := []*AdminCandidate{ - { - NodeID: "candidate-1", - Term: 1, - Score: 0.6, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), - }, - { - NodeID: "candidate-2", - Term: 1, - Score: 0.8, - Capabilities: []string{"admin", "leader"}, - LastSeen: time.Now(), - }, - { - NodeID: "candidate-3", - Term: 1, - Score: 0.7, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), - }, - } +func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) { + em := newTestElectionManager(t) em.mu.Lock() - for _, candidate := range candidates { - em.candidates[candidate.NodeID] = candidate + em.candidates = map[string]*AdminCandidate{ + "candidate-1": { + NodeID: "candidate-1", + PeerID: em.host.ID(), + Score: 0.65, + }, + "candidate-2": { + NodeID: "candidate-2", + PeerID: em.host.ID(), + Score: 0.80, + }, + } + em.votes = map[string]string{ + "voter-a": "candidate-1", + "voter-b": "candidate-2", + "voter-c": "candidate-2", } - - // Add some votes - em.votes["voter-1"] = "candidate-2" - em.votes["voter-2"] = "candidate-2" - em.votes["voter-3"] = "candidate-1" em.mu.Unlock() - // Find winner winner := em.findElectionWinner() - if winner == nil { - t.Fatal("Expected findElectionWinner to return a winner") + t.Fatal("expected a winner to be selected") } - - // candidate-2 should win with most votes (2 votes) if winner.NodeID != "candidate-2" { - t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID) + t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID) } } -func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Add candidates but no votes - should fall back to highest score - candidates := []*AdminCandidate{ - { - NodeID: "candidate-1", - Term: 1, - Score: 0.6, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), - }, - { - NodeID: "candidate-2", - Term: 1, - Score: 0.9, // Highest score - Capabilities: []string{"admin", "leader"}, - LastSeen: time.Now(), - }, - } +func TestHandleElectionMessageAddsCandidate(t *testing.T) { + em := newTestElectionManager(t) em.mu.Lock() - for _, candidate := range candidates { - em.candidates[candidate.NodeID] = candidate - } + em.currentTerm = 3 + em.state = StateElecting em.mu.Unlock() - // Find winner without any votes - winner := em.findElectionWinner() - - if winner == nil { - t.Fatal("Expected findElectionWinner to return a winner") - } - - // candidate-2 should win with highest score - if winner.NodeID != "candidate-2" { - t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID) - } -} - -func TestElectionManager_HandleElectionVote(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Add a candidate first candidate := &AdminCandidate{ - NodeID: "candidate-1", - Term: 1, - Score: 0.8, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), + NodeID: "peer-2", + PeerID: em.host.ID(), + Capabilities: []string{"admin_election"}, + Uptime: time.Second, + Score: 0.75, + } + + payload, err := json.Marshal(candidate) + if err != nil { + t.Fatalf("failed to marshal candidate: %v", err) + } + + var data map[string]interface{} + if err := json.Unmarshal(payload, &data); err != nil { + t.Fatalf("failed to unmarshal candidate payload: %v", err) } - - em.mu.Lock() - em.candidates["candidate-1"] = candidate - em.mu.Unlock() - // Create vote message msg := ElectionMessage{ - Type: MessageTypeVote, - NodeID: "voter-1", - Data: map[string]interface{}{ - "candidate": "candidate-1", - }, + Type: "candidacy_announcement", + NodeID: "peer-2", + Timestamp: time.Now(), + Term: 3, + Data: data, } - // Handle the vote - em.handleElectionVote(msg) + serialized, err := json.Marshal(msg) + if err != nil { + t.Fatalf("failed to marshal election message: %v", err) + } + + em.handleElectionMessage(serialized) - // Verify vote was recorded em.mu.RLock() - vote, exists := em.votes["voter-1"] + _, exists := em.candidates["peer-2"] em.mu.RUnlock() if !exists { - t.Error("Expected vote to be recorded after handling vote message") - } - - if vote != "candidate-1" { - t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote) + t.Fatal("expected candidacy announcement to register candidate") } } -func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, +func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) { + em := newTestElectionManager(t) + + if err := em.SendAdminHeartbeat(); err == nil { + t.Fatal("expected error when non-admin sends heartbeat") } - em := NewElectionManager(cfg) - - // Create vote message with invalid data - msg := ElectionMessage{ - Type: MessageTypeVote, - NodeID: "voter-1", - Data: "invalid-data", // Should be map[string]interface{} + if err := em.Start(); err != nil { + t.Fatalf("failed to start election manager: %v", err) } - // Handle the vote - should not crash - em.handleElectionVote(msg) - - // Verify no vote was recorded - em.mu.RLock() - _, exists := em.votes["voter-1"] - em.mu.RUnlock() - - if exists { - t.Error("Expected no vote to be recorded with invalid data") - } -} - -func TestElectionManager_CompleteElection(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Set up election state em.mu.Lock() - em.state = StateCandidate - em.currentTerm = 1 + em.currentAdmin = em.nodeID em.mu.Unlock() - // Add a candidate - candidate := &AdminCandidate{ - NodeID: "winner", - Term: 1, - Score: 0.9, - Capabilities: []string{"admin", "leader"}, - LastSeen: time.Now(), - } - - em.mu.Lock() - em.candidates["winner"] = candidate - em.mu.Unlock() - - // Complete election - em.CompleteElection() - - // Verify state reset - em.mu.RLock() - state := em.state - em.mu.RUnlock() - - if state != StateIdle { - t.Errorf("Expected state to be StateIdle after completing election, got %v", state) + if err := em.SendAdminHeartbeat(); err != nil { + t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err) } } - -func TestElectionManager_Concurrency(t *testing.T) { - cfg := &config.Config{ - Agent: config.AgentConfig{ - ID: "test-node", - }, - } - - em := NewElectionManager(cfg) - - // Test concurrent access to vote and candidate operations - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - // Add a candidate - candidate := &AdminCandidate{ - NodeID: "candidate-1", - Term: 1, - Score: 0.8, - Capabilities: []string{"admin"}, - LastSeen: time.Now(), - } - - err := em.AddCandidate(candidate) - if err != nil { - t.Fatalf("Failed to add candidate: %v", err) - } - - // Run concurrent operations - done := make(chan bool, 2) - - // Concurrent voting - go func() { - defer func() { done <- true }() - for i := 0; i < 10; i++ { - select { - case <-ctx.Done(): - return - default: - em.Vote("candidate-1") // Ignore errors in concurrent test - time.Sleep(10 * time.Millisecond) - } - } - }() - - // Concurrent state checking - go func() { - defer func() { done <- true }() - for i := 0; i < 10; i++ { - select { - case <-ctx.Done(): - return - default: - em.findElectionWinner() // Just check for races - time.Sleep(10 * time.Millisecond) - } - } - }() - - // Wait for completion - for i := 0; i < 2; i++ { - select { - case <-done: - case <-ctx.Done(): - t.Fatal("Concurrent test timed out") - } - } -} \ No newline at end of file diff --git a/pkg/metrics/prometheus_metrics.go b/pkg/metrics/prometheus_metrics.go index 465b0ce..3caf0d3 100644 --- a/pkg/metrics/prometheus_metrics.go +++ b/pkg/metrics/prometheus_metrics.go @@ -2,27 +2,26 @@ package metrics import ( "context" - "fmt" "log" "net/http" "sync" "time" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" ) // CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system type CHORUSMetrics struct { - registry *prometheus.Registry - httpServer *http.Server - + registry *prometheus.Registry + httpServer *http.Server + // System metrics - systemInfo *prometheus.GaugeVec - uptime prometheus.Gauge - buildInfo *prometheus.GaugeVec - + systemInfo *prometheus.GaugeVec + uptime prometheus.Gauge + buildInfo *prometheus.GaugeVec + // P2P metrics p2pConnectedPeers prometheus.Gauge p2pMessagesSent *prometheus.CounterVec @@ -30,95 +29,98 @@ type CHORUSMetrics struct { p2pMessageLatency *prometheus.HistogramVec p2pConnectionDuration *prometheus.HistogramVec p2pPeerScore *prometheus.GaugeVec - + // DHT metrics - dhtPutOperations *prometheus.CounterVec - dhtGetOperations *prometheus.CounterVec - dhtOperationLatency *prometheus.HistogramVec - dhtProviderRecords prometheus.Gauge - dhtReplicationFactor *prometheus.GaugeVec - dhtContentKeys prometheus.Gauge - dhtCacheHits *prometheus.CounterVec - dhtCacheMisses *prometheus.CounterVec - + dhtPutOperations *prometheus.CounterVec + dhtGetOperations *prometheus.CounterVec + dhtOperationLatency *prometheus.HistogramVec + dhtProviderRecords prometheus.Gauge + dhtReplicationFactor *prometheus.GaugeVec + dhtContentKeys prometheus.Gauge + dhtCacheHits *prometheus.CounterVec + dhtCacheMisses *prometheus.CounterVec + // PubSub metrics - pubsubTopics prometheus.Gauge - pubsubSubscribers *prometheus.GaugeVec - pubsubMessages *prometheus.CounterVec - pubsubMessageLatency *prometheus.HistogramVec - pubsubMessageSize *prometheus.HistogramVec - + pubsubTopics prometheus.Gauge + pubsubSubscribers *prometheus.GaugeVec + pubsubMessages *prometheus.CounterVec + pubsubMessageLatency *prometheus.HistogramVec + pubsubMessageSize *prometheus.HistogramVec + // Election metrics - electionTerm prometheus.Gauge - electionState *prometheus.GaugeVec - heartbeatsSent prometheus.Counter - heartbeatsReceived prometheus.Counter - leadershipChanges prometheus.Counter - leaderUptime prometheus.Gauge - electionLatency prometheus.Histogram - + electionTerm prometheus.Gauge + electionState *prometheus.GaugeVec + heartbeatsSent prometheus.Counter + heartbeatsReceived prometheus.Counter + leadershipChanges prometheus.Counter + leaderUptime prometheus.Gauge + electionLatency prometheus.Histogram + // Health metrics - healthChecksPassed *prometheus.CounterVec - healthChecksFailed *prometheus.CounterVec - healthCheckDuration *prometheus.HistogramVec - systemHealthScore prometheus.Gauge - componentHealthScore *prometheus.GaugeVec - + healthChecksPassed *prometheus.CounterVec + healthChecksFailed *prometheus.CounterVec + healthCheckDuration *prometheus.HistogramVec + systemHealthScore prometheus.Gauge + componentHealthScore *prometheus.GaugeVec + // Task metrics - tasksActive prometheus.Gauge - tasksQueued prometheus.Gauge - tasksCompleted *prometheus.CounterVec - taskDuration *prometheus.HistogramVec - taskQueueWaitTime prometheus.Histogram - + tasksActive prometheus.Gauge + tasksQueued prometheus.Gauge + tasksCompleted *prometheus.CounterVec + taskDuration *prometheus.HistogramVec + taskQueueWaitTime prometheus.Histogram + // SLURP metrics (context generation) slurpGenerated *prometheus.CounterVec slurpGenerationTime prometheus.Histogram slurpQueueLength prometheus.Gauge slurpActiveJobs prometheus.Gauge slurpLeadershipEvents prometheus.Counter - + + // SHHH sentinel metrics + shhhFindings *prometheus.CounterVec + // UCXI metrics (protocol resolution) ucxiRequests *prometheus.CounterVec ucxiResolutionLatency prometheus.Histogram ucxiCacheHits prometheus.Counter ucxiCacheMisses prometheus.Counter ucxiContentSize prometheus.Histogram - + // Resource metrics - cpuUsage prometheus.Gauge - memoryUsage prometheus.Gauge - diskUsage *prometheus.GaugeVec - networkBytesIn prometheus.Counter - networkBytesOut prometheus.Counter - goroutines prometheus.Gauge - + cpuUsage prometheus.Gauge + memoryUsage prometheus.Gauge + diskUsage *prometheus.GaugeVec + networkBytesIn prometheus.Counter + networkBytesOut prometheus.Counter + goroutines prometheus.Gauge + // Error metrics - errors *prometheus.CounterVec - panics prometheus.Counter - - startTime time.Time - mu sync.RWMutex + errors *prometheus.CounterVec + panics prometheus.Counter + + startTime time.Time + mu sync.RWMutex } // MetricsConfig configures the metrics system type MetricsConfig struct { // HTTP server config - ListenAddr string - MetricsPath string - + ListenAddr string + MetricsPath string + // Histogram buckets LatencyBuckets []float64 SizeBuckets []float64 - + // Labels - NodeID string - Version string - Environment string - Cluster string - + NodeID string + Version string + Environment string + Cluster string + // Collection intervals - SystemMetricsInterval time.Duration + SystemMetricsInterval time.Duration ResourceMetricsInterval time.Duration } @@ -143,20 +145,20 @@ func NewCHORUSMetrics(config *MetricsConfig) *CHORUSMetrics { if config == nil { config = DefaultMetricsConfig() } - + registry := prometheus.NewRegistry() - + metrics := &CHORUSMetrics{ registry: registry, startTime: time.Now(), } - + // Initialize all metrics metrics.initializeMetrics(config) - + // Register with custom registry metrics.registerMetrics() - + return metrics } @@ -170,14 +172,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"node_id", "version", "go_version", "cluster", "environment"}, ) - + m.uptime = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_uptime_seconds", Help: "System uptime in seconds", }, ) - + // P2P metrics m.p2pConnectedPeers = promauto.NewGauge( prometheus.GaugeOpts{ @@ -185,7 +187,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Help: "Number of connected P2P peers", }, ) - + m.p2pMessagesSent = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_p2p_messages_sent_total", @@ -193,7 +195,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"message_type", "peer_id"}, ) - + m.p2pMessagesReceived = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_p2p_messages_received_total", @@ -201,7 +203,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"message_type", "peer_id"}, ) - + m.p2pMessageLatency = promauto.NewHistogramVec( prometheus.HistogramOpts{ Name: "chorus_p2p_message_latency_seconds", @@ -210,7 +212,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"message_type"}, ) - + // DHT metrics m.dhtPutOperations = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -219,7 +221,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"status"}, ) - + m.dhtGetOperations = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_dht_get_operations_total", @@ -227,7 +229,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"status"}, ) - + m.dhtOperationLatency = promauto.NewHistogramVec( prometheus.HistogramOpts{ Name: "chorus_dht_operation_latency_seconds", @@ -236,21 +238,21 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"operation", "status"}, ) - + m.dhtProviderRecords = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_dht_provider_records", Help: "Number of DHT provider records", }, ) - + m.dhtContentKeys = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_dht_content_keys", Help: "Number of DHT content keys", }, ) - + m.dhtReplicationFactor = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "chorus_dht_replication_factor", @@ -258,7 +260,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"key_hash"}, ) - + // PubSub metrics m.pubsubTopics = promauto.NewGauge( prometheus.GaugeOpts{ @@ -266,7 +268,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Help: "Number of active PubSub topics", }, ) - + m.pubsubMessages = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_pubsub_messages_total", @@ -274,7 +276,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"topic", "direction", "message_type"}, ) - + m.pubsubMessageLatency = promauto.NewHistogramVec( prometheus.HistogramOpts{ Name: "chorus_pubsub_message_latency_seconds", @@ -283,7 +285,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"topic"}, ) - + // Election metrics m.electionTerm = promauto.NewGauge( prometheus.GaugeOpts{ @@ -291,7 +293,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Help: "Current election term", }, ) - + m.electionState = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "chorus_election_state", @@ -299,28 +301,28 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"state"}, ) - + m.heartbeatsSent = promauto.NewCounter( prometheus.CounterOpts{ Name: "chorus_heartbeats_sent_total", Help: "Total number of heartbeats sent", }, ) - + m.heartbeatsReceived = promauto.NewCounter( prometheus.CounterOpts{ Name: "chorus_heartbeats_received_total", Help: "Total number of heartbeats received", }, ) - + m.leadershipChanges = promauto.NewCounter( prometheus.CounterOpts{ Name: "chorus_leadership_changes_total", Help: "Total number of leadership changes", }, ) - + // Health metrics m.healthChecksPassed = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -329,7 +331,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"check_name"}, ) - + m.healthChecksFailed = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_health_checks_failed_total", @@ -337,14 +339,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"check_name", "reason"}, ) - + m.systemHealthScore = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_system_health_score", Help: "Overall system health score (0-1)", }, ) - + m.componentHealthScore = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "chorus_component_health_score", @@ -352,7 +354,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"component"}, ) - + // Task metrics m.tasksActive = promauto.NewGauge( prometheus.GaugeOpts{ @@ -360,14 +362,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Help: "Number of active tasks", }, ) - + m.tasksQueued = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_tasks_queued", Help: "Number of queued tasks", }, ) - + m.tasksCompleted = promauto.NewCounterVec( prometheus.CounterOpts{ Name: "chorus_tasks_completed_total", @@ -375,7 +377,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"status", "task_type"}, ) - + m.taskDuration = promauto.NewHistogramVec( prometheus.HistogramOpts{ Name: "chorus_task_duration_seconds", @@ -384,7 +386,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"task_type", "status"}, ) - + // SLURP metrics m.slurpGenerated = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -393,7 +395,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"role", "status"}, ) - + m.slurpGenerationTime = promauto.NewHistogram( prometheus.HistogramOpts{ Name: "chorus_slurp_generation_time_seconds", @@ -401,14 +403,23 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0}, }, ) - + m.slurpQueueLength = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_slurp_queue_length", Help: "Length of SLURP generation queue", }, ) - + + // SHHH metrics + m.shhhFindings = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "chorus_shhh_findings_total", + Help: "Total number of SHHH redaction findings", + }, + []string{"rule", "severity"}, + ) + // UCXI metrics m.ucxiRequests = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -417,7 +428,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"method", "status"}, ) - + m.ucxiResolutionLatency = promauto.NewHistogram( prometheus.HistogramOpts{ Name: "chorus_ucxi_resolution_latency_seconds", @@ -425,7 +436,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Buckets: config.LatencyBuckets, }, ) - + // Resource metrics m.cpuUsage = promauto.NewGauge( prometheus.GaugeOpts{ @@ -433,14 +444,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { Help: "CPU usage ratio (0-1)", }, ) - + m.memoryUsage = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_memory_usage_bytes", Help: "Memory usage in bytes", }, ) - + m.diskUsage = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "chorus_disk_usage_ratio", @@ -448,14 +459,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"mount_point"}, ) - + m.goroutines = promauto.NewGauge( prometheus.GaugeOpts{ Name: "chorus_goroutines", Help: "Number of goroutines", }, ) - + // Error metrics m.errors = promauto.NewCounterVec( prometheus.CounterOpts{ @@ -464,7 +475,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) { }, []string{"component", "error_type"}, ) - + m.panics = promauto.NewCounter( prometheus.CounterOpts{ Name: "chorus_panics_total", @@ -482,31 +493,31 @@ func (m *CHORUSMetrics) registerMetrics() { // StartServer starts the Prometheus metrics HTTP server func (m *CHORUSMetrics) StartServer(config *MetricsConfig) error { mux := http.NewServeMux() - + // Use custom registry handler := promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{ EnableOpenMetrics: true, }) mux.Handle(config.MetricsPath, handler) - + // Health endpoint mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte("OK")) }) - + m.httpServer = &http.Server{ Addr: config.ListenAddr, Handler: mux, } - + go func() { log.Printf("Starting metrics server on %s%s", config.ListenAddr, config.MetricsPath) if err := m.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { log.Printf("Metrics server error: %v", err) } }() - + return nil } @@ -656,6 +667,15 @@ func (m *CHORUSMetrics) SetSLURPQueueLength(length int) { m.slurpQueueLength.Set(float64(length)) } +// SHHH Metrics Methods + +func (m *CHORUSMetrics) IncrementSHHHFindings(rule, severity string, count int) { + if m == nil || m.shhhFindings == nil || count <= 0 { + return + } + m.shhhFindings.WithLabelValues(rule, severity).Add(float64(count)) +} + // UCXI Metrics Methods func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) { @@ -708,21 +728,21 @@ func (m *CHORUSMetrics) UpdateUptime() { func (m *CHORUSMetrics) CollectMetrics(config *MetricsConfig) { systemTicker := time.NewTicker(config.SystemMetricsInterval) resourceTicker := time.NewTicker(config.ResourceMetricsInterval) - + go func() { defer systemTicker.Stop() defer resourceTicker.Stop() - + for { select { case <-systemTicker.C: m.UpdateUptime() // Collect other system metrics - + case <-resourceTicker.C: // Collect resource metrics (would integrate with actual system monitoring) // m.collectResourceMetrics() } } }() -} \ No newline at end of file +} diff --git a/pkg/shhh/doc.go b/pkg/shhh/doc.go new file mode 100644 index 0000000..6298db6 --- /dev/null +++ b/pkg/shhh/doc.go @@ -0,0 +1,11 @@ +// Package shhh provides the CHORUS secrets sentinel responsible for detecting +// and redacting sensitive values before they leave the runtime. The sentinel +// focuses on predictable failure modes (log emission, telemetry fan-out, +// request forwarding) and offers a composable API for registering additional +// redaction rules, emitting audit events, and tracking operational metrics. +// +// The initial implementation focuses on high-signal secrets (API keys, +// bearer/OAuth tokens, private keys) so the runtime can start integrating +// SHHH into COOEE and WHOOSH logging immediately while the broader roadmap +// items (automated redaction replay, policy driven rules) continue landing. +package shhh diff --git a/pkg/shhh/rule.go b/pkg/shhh/rule.go new file mode 100644 index 0000000..6361ddd --- /dev/null +++ b/pkg/shhh/rule.go @@ -0,0 +1,130 @@ +package shhh + +import ( + "crypto/sha256" + "encoding/base64" + "regexp" + "sort" + "strings" +) + +type compiledRule struct { + name string + regex *regexp.Regexp + replacement string + severity Severity + tags []string +} + +type matchRecord struct { + value string +} + +func (r *compiledRule) apply(in string) (string, []matchRecord) { + indices := r.regex.FindAllStringSubmatchIndex(in, -1) + if len(indices) == 0 { + return in, nil + } + + var builder strings.Builder + builder.Grow(len(in)) + + matches := make([]matchRecord, 0, len(indices)) + last := 0 + for _, loc := range indices { + start, end := loc[0], loc[1] + builder.WriteString(in[last:start]) + replaced := r.regex.ExpandString(nil, r.replacement, in, loc) + builder.Write(replaced) + matches = append(matches, matchRecord{value: in[start:end]}) + last = end + } + builder.WriteString(in[last:]) + + return builder.String(), matches +} + +func buildDefaultRuleConfigs(placeholder string) []RuleConfig { + if placeholder == "" { + placeholder = "[REDACTED]" + } + return []RuleConfig{ + { + Name: "bearer-token", + Pattern: `(?i)(authorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`, + ReplacementTemplate: "$1" + placeholder, + Severity: SeverityMedium, + Tags: []string{"token", "http"}, + }, + { + Name: "api-key", + Pattern: `(?i)((?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`, + ReplacementTemplate: "$1" + placeholder + "$3", + Severity: SeverityHigh, + Tags: []string{"credentials"}, + }, + { + Name: "openai-secret", + Pattern: `(sk-[A-Za-z0-9]{20,})`, + ReplacementTemplate: placeholder, + Severity: SeverityHigh, + Tags: []string{"llm", "api"}, + }, + { + Name: "oauth-refresh-token", + Pattern: `(?i)(refresh_token"?\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`, + ReplacementTemplate: "$1" + placeholder + "$3", + Severity: SeverityMedium, + Tags: []string{"oauth"}, + }, + { + Name: "private-key-block", + Pattern: `(?s)(-----BEGIN [^-]+ PRIVATE KEY-----)[^-]+(-----END [^-]+ PRIVATE KEY-----)`, + ReplacementTemplate: "$1\n" + placeholder + "\n$2", + Severity: SeverityHigh, + Tags: []string{"pem", "key"}, + }, + } +} + +func compileRules(cfg Config, placeholder string) ([]*compiledRule, error) { + configs := make([]RuleConfig, 0) + if !cfg.DisableDefaultRules { + configs = append(configs, buildDefaultRuleConfigs(placeholder)...) + } + configs = append(configs, cfg.CustomRules...) + + rules := make([]*compiledRule, 0, len(configs)) + for _, rc := range configs { + if rc.Name == "" || rc.Pattern == "" { + continue + } + replacement := rc.ReplacementTemplate + if replacement == "" { + replacement = placeholder + } + re, err := regexp.Compile(rc.Pattern) + if err != nil { + return nil, err + } + compiled := &compiledRule{ + name: rc.Name, + replacement: replacement, + regex: re, + severity: rc.Severity, + tags: append([]string(nil), rc.Tags...), + } + rules = append(rules, compiled) + } + + sort.SliceStable(rules, func(i, j int) bool { + return rules[i].name < rules[j].name + }) + + return rules, nil +} + +func hashSecret(value string) string { + sum := sha256.Sum256([]byte(value)) + return base64.RawStdEncoding.EncodeToString(sum[:]) +} diff --git a/pkg/shhh/sentinel.go b/pkg/shhh/sentinel.go new file mode 100644 index 0000000..01485db --- /dev/null +++ b/pkg/shhh/sentinel.go @@ -0,0 +1,407 @@ +package shhh + +import ( + "context" + "errors" + "fmt" + "sort" + "sync" +) + +// Option configures the sentinel during construction. +type Option func(*Sentinel) + +// FindingObserver receives aggregated findings for each redaction operation. +type FindingObserver func(context.Context, []Finding) + +// WithAuditSink attaches an audit sink for per-redaction events. +func WithAuditSink(sink AuditSink) Option { + return func(s *Sentinel) { + s.audit = sink + } +} + +// WithStats allows callers to supply a shared stats collector. +func WithStats(stats *Stats) Option { + return func(s *Sentinel) { + s.stats = stats + } +} + +// WithFindingObserver registers an observer that is invoked whenever redaction +// produces findings. +func WithFindingObserver(observer FindingObserver) Option { + return func(s *Sentinel) { + if observer == nil { + return + } + s.observers = append(s.observers, observer) + } +} + +// Sentinel performs secret detection/redaction across text payloads. +type Sentinel struct { + mu sync.RWMutex + enabled bool + placeholder string + rules []*compiledRule + audit AuditSink + stats *Stats + observers []FindingObserver +} + +// NewSentinel creates a new secrets sentinel using the provided configuration. +func NewSentinel(cfg Config, opts ...Option) (*Sentinel, error) { + placeholder := cfg.RedactionPlaceholder + if placeholder == "" { + placeholder = "[REDACTED]" + } + + s := &Sentinel{ + enabled: !cfg.Disabled, + placeholder: placeholder, + stats: NewStats(), + } + for _, opt := range opts { + opt(s) + } + if s.stats == nil { + s.stats = NewStats() + } + + rules, err := compileRules(cfg, placeholder) + if err != nil { + return nil, fmt.Errorf("compile SHHH rules: %w", err) + } + if len(rules) == 0 { + return nil, errors.New("no SHHH rules configured") + } + s.rules = rules + + return s, nil +} + +// Enabled reports whether the sentinel is actively redacting. +func (s *Sentinel) Enabled() bool { + s.mu.RLock() + defer s.mu.RUnlock() + return s.enabled +} + +// Toggle enables or disables the sentinel at runtime. +func (s *Sentinel) Toggle(enabled bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.enabled = enabled +} + +// SetAuditSink updates the audit sink at runtime. +func (s *Sentinel) SetAuditSink(sink AuditSink) { + s.mu.Lock() + defer s.mu.Unlock() + s.audit = sink +} + +// AddFindingObserver registers an observer after construction. +func (s *Sentinel) AddFindingObserver(observer FindingObserver) { + if observer == nil { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.observers = append(s.observers, observer) +} + +// StatsSnapshot returns a snapshot of the current counters. +func (s *Sentinel) StatsSnapshot() StatsSnapshot { + s.mu.RLock() + stats := s.stats + s.mu.RUnlock() + if stats == nil { + return StatsSnapshot{} + } + return stats.Snapshot() +} + +// RedactText scans the provided text and redacts any findings. +func (s *Sentinel) RedactText(ctx context.Context, text string, labels map[string]string) (string, []Finding) { + s.mu.RLock() + enabled := s.enabled + rules := s.rules + stats := s.stats + audit := s.audit + s.mu.RUnlock() + + if !enabled || len(rules) == 0 { + return text, nil + } + if stats != nil { + stats.IncScan() + } + + aggregates := make(map[string]*findingAggregate) + current := text + path := derivePath(labels) + + for _, rule := range rules { + redacted, matches := rule.apply(current) + if len(matches) == 0 { + continue + } + current = redacted + if stats != nil { + stats.AddFindings(rule.name, len(matches)) + } + recordAggregate(aggregates, rule, path, len(matches)) + + if audit != nil { + metadata := cloneLabels(labels) + for _, match := range matches { + event := AuditEvent{ + Rule: rule.name, + Severity: rule.severity, + Tags: append([]string(nil), rule.tags...), + Path: path, + Hash: hashSecret(match.value), + Metadata: metadata, + } + audit.RecordRedaction(ctx, event) + } + } + } + + findings := flattenAggregates(aggregates) + s.notifyObservers(ctx, findings) + return current, findings +} + +// RedactMap walks the map and redacts in-place. It returns the collected findings. +func (s *Sentinel) RedactMap(ctx context.Context, payload map[string]any) []Finding { + return s.RedactMapWithLabels(ctx, payload, nil) +} + +// RedactMapWithLabels allows callers to specify base labels that will be merged +// into metadata for nested structures. +func (s *Sentinel) RedactMapWithLabels(ctx context.Context, payload map[string]any, baseLabels map[string]string) []Finding { + if payload == nil { + return nil + } + + aggregates := make(map[string]*findingAggregate) + s.redactValue(ctx, payload, "", baseLabels, aggregates) + findings := flattenAggregates(aggregates) + s.notifyObservers(ctx, findings) + return findings +} + +func (s *Sentinel) redactValue(ctx context.Context, value any, path string, baseLabels map[string]string, agg map[string]*findingAggregate) { + switch v := value.(type) { + case map[string]interface{}: + for key, val := range v { + childPath := joinPath(path, key) + switch typed := val.(type) { + case string: + labels := mergeLabels(baseLabels, childPath) + redacted, findings := s.RedactText(ctx, typed, labels) + if redacted != typed { + v[key] = redacted + } + mergeAggregates(agg, findings) + case fmt.Stringer: + labels := mergeLabels(baseLabels, childPath) + text := typed.String() + redacted, findings := s.RedactText(ctx, text, labels) + if redacted != text { + v[key] = redacted + } + mergeAggregates(agg, findings) + default: + s.redactValue(ctx, typed, childPath, baseLabels, agg) + } + } + case []interface{}: + for idx, item := range v { + childPath := indexPath(path, idx) + switch typed := item.(type) { + case string: + labels := mergeLabels(baseLabels, childPath) + redacted, findings := s.RedactText(ctx, typed, labels) + if redacted != typed { + v[idx] = redacted + } + mergeAggregates(agg, findings) + case fmt.Stringer: + labels := mergeLabels(baseLabels, childPath) + text := typed.String() + redacted, findings := s.RedactText(ctx, text, labels) + if redacted != text { + v[idx] = redacted + } + mergeAggregates(agg, findings) + default: + s.redactValue(ctx, typed, childPath, baseLabels, agg) + } + } + case []string: + for idx, item := range v { + childPath := indexPath(path, idx) + labels := mergeLabels(baseLabels, childPath) + redacted, findings := s.RedactText(ctx, item, labels) + if redacted != item { + v[idx] = redacted + } + mergeAggregates(agg, findings) + } + } +} + +func (s *Sentinel) notifyObservers(ctx context.Context, findings []Finding) { + if len(findings) == 0 { + return + } + findingsCopy := append([]Finding(nil), findings...) + s.mu.RLock() + observers := append([]FindingObserver(nil), s.observers...) + s.mu.RUnlock() + for _, observer := range observers { + observer(ctx, findingsCopy) + } +} + +func mergeAggregates(dest map[string]*findingAggregate, findings []Finding) { + for i := range findings { + f := findings[i] + agg := dest[f.Rule] + if agg == nil { + agg = &findingAggregate{ + rule: f.Rule, + severity: f.Severity, + tags: append([]string(nil), f.Tags...), + locations: make(map[string]int), + } + dest[f.Rule] = agg + } + agg.count += f.Count + for _, loc := range f.Locations { + agg.locations[loc.Path] += loc.Count + } + } +} + +func recordAggregate(dest map[string]*findingAggregate, rule *compiledRule, path string, count int) { + agg := dest[rule.name] + if agg == nil { + agg = &findingAggregate{ + rule: rule.name, + severity: rule.severity, + tags: append([]string(nil), rule.tags...), + locations: make(map[string]int), + } + dest[rule.name] = agg + } + agg.count += count + if path != "" { + agg.locations[path] += count + } +} + +func flattenAggregates(agg map[string]*findingAggregate) []Finding { + if len(agg) == 0 { + return nil + } + keys := make([]string, 0, len(agg)) + for key := range agg { + keys = append(keys, key) + } + sort.Strings(keys) + + findings := make([]Finding, 0, len(agg)) + for _, key := range keys { + entry := agg[key] + locations := make([]Location, 0, len(entry.locations)) + if len(entry.locations) > 0 { + paths := make([]string, 0, len(entry.locations)) + for path := range entry.locations { + paths = append(paths, path) + } + sort.Strings(paths) + for _, path := range paths { + locations = append(locations, Location{Path: path, Count: entry.locations[path]}) + } + } + findings = append(findings, Finding{ + Rule: entry.rule, + Severity: entry.severity, + Tags: append([]string(nil), entry.tags...), + Count: entry.count, + Locations: locations, + }) + } + return findings +} + +func derivePath(labels map[string]string) string { + if labels == nil { + return "" + } + if path := labels["path"]; path != "" { + return path + } + if path := labels["source"]; path != "" { + return path + } + if path := labels["field"]; path != "" { + return path + } + return "" +} + +func cloneLabels(labels map[string]string) map[string]string { + if len(labels) == 0 { + return nil + } + clone := make(map[string]string, len(labels)) + for k, v := range labels { + clone[k] = v + } + return clone +} + +func joinPath(prefix, key string) string { + if prefix == "" { + return key + } + if key == "" { + return prefix + } + return prefix + "." + key +} + +func indexPath(prefix string, idx int) string { + if prefix == "" { + return fmt.Sprintf("[%d]", idx) + } + return fmt.Sprintf("%s[%d]", prefix, idx) +} + +func mergeLabels(base map[string]string, path string) map[string]string { + if base == nil && path == "" { + return nil + } + labels := cloneLabels(base) + if labels == nil { + labels = make(map[string]string, 1) + } + if path != "" { + labels["path"] = path + } + return labels +} + +type findingAggregate struct { + rule string + severity Severity + tags []string + count int + locations map[string]int +} diff --git a/pkg/shhh/sentinel_test.go b/pkg/shhh/sentinel_test.go new file mode 100644 index 0000000..fe43bf7 --- /dev/null +++ b/pkg/shhh/sentinel_test.go @@ -0,0 +1,95 @@ +package shhh + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +type recordingSink struct { + events []AuditEvent +} + +func (r *recordingSink) RecordRedaction(_ context.Context, event AuditEvent) { + r.events = append(r.events, event) +} + +func TestRedactText_DefaultRules(t *testing.T) { + sentinel, err := NewSentinel(Config{}) + require.NoError(t, err) + + input := "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.secret" + redacted, findings := sentinel.RedactText(context.Background(), input, map[string]string{"source": "http.request.headers.authorization"}) + + require.Equal(t, "Authorization: Bearer [REDACTED]", redacted) + require.Len(t, findings, 1) + require.Equal(t, "bearer-token", findings[0].Rule) + require.Equal(t, 1, findings[0].Count) + require.NotEmpty(t, findings[0].Locations) + + snapshot := sentinel.StatsSnapshot() + require.Equal(t, uint64(1), snapshot.TotalScans) + require.Equal(t, uint64(1), snapshot.TotalFindings) + require.Equal(t, uint64(1), snapshot.PerRuleFindings["bearer-token"]) +} + +func TestRedactMap_NestedStructures(t *testing.T) { + sentinel, err := NewSentinel(Config{}) + require.NoError(t, err) + + payload := map[string]any{ + "config": map[string]any{ + "api_key": "API_KEY=1234567890ABCDEFG", + }, + "tokens": []any{ + "sk-test1234567890ABCDEF", + map[string]any{"refresh": "refresh_token=abcdef12345"}, + }, + } + + findings := sentinel.RedactMap(context.Background(), payload) + require.NotEmpty(t, findings) + + config := payload["config"].(map[string]any) + require.Equal(t, "API_KEY=[REDACTED]", config["api_key"]) + + tokens := payload["tokens"].([]any) + require.Equal(t, "[REDACTED]", tokens[0]) + + inner := tokens[1].(map[string]any) + require.Equal(t, "refresh_token=[REDACTED]", inner["refresh"]) + + total := 0 + for _, finding := range findings { + total += finding.Count + } + require.Equal(t, 3, total) +} + +func TestAuditSinkReceivesEvents(t *testing.T) { + sink := &recordingSink{} + cfg := Config{ + DisableDefaultRules: true, + CustomRules: []RuleConfig{ + { + Name: "custom-secret", + Pattern: `(secret\s*=\s*)([A-Za-z0-9]{6,})`, + ReplacementTemplate: "$1[REDACTED]", + Severity: SeverityHigh, + }, + }, + } + + sentinel, err := NewSentinel(cfg, WithAuditSink(sink)) + require.NoError(t, err) + + _, findings := sentinel.RedactText(context.Background(), "secret=mysecretvalue", map[string]string{"source": "test"}) + require.Len(t, findings, 1) + require.Equal(t, 1, findings[0].Count) + + require.Len(t, sink.events, 1) + require.Equal(t, "custom-secret", sink.events[0].Rule) + require.NotEmpty(t, sink.events[0].Hash) + require.Equal(t, "test", sink.events[0].Path) +} diff --git a/pkg/shhh/stats.go b/pkg/shhh/stats.go new file mode 100644 index 0000000..5b51cf6 --- /dev/null +++ b/pkg/shhh/stats.go @@ -0,0 +1,60 @@ +package shhh + +import ( + "sync" + "sync/atomic" +) + +// Stats tracks aggregate counts for the sentinel. +type Stats struct { + totalScans atomic.Uint64 + totalFindings atomic.Uint64 + perRule sync.Map // string -> *atomic.Uint64 +} + +// NewStats constructs a Stats collector. +func NewStats() *Stats { + return &Stats{} +} + +// IncScan increments the total scan counter. +func (s *Stats) IncScan() { + if s == nil { + return + } + s.totalScans.Add(1) +} + +// AddFindings records findings for a rule. +func (s *Stats) AddFindings(rule string, count int) { + if s == nil || count <= 0 { + return + } + s.totalFindings.Add(uint64(count)) + counterAny, _ := s.perRule.LoadOrStore(rule, new(atomic.Uint64)) + counter := counterAny.(*atomic.Uint64) + counter.Add(uint64(count)) +} + +// Snapshot returns a point-in-time view of the counters. +func (s *Stats) Snapshot() StatsSnapshot { + if s == nil { + return StatsSnapshot{} + } + snapshot := StatsSnapshot{ + TotalScans: s.totalScans.Load(), + TotalFindings: s.totalFindings.Load(), + PerRuleFindings: make(map[string]uint64), + } + s.perRule.Range(func(key, value any) bool { + name, ok := key.(string) + if !ok { + return true + } + if counter, ok := value.(*atomic.Uint64); ok { + snapshot.PerRuleFindings[name] = counter.Load() + } + return true + }) + return snapshot +} diff --git a/pkg/shhh/types.go b/pkg/shhh/types.go new file mode 100644 index 0000000..f40e865 --- /dev/null +++ b/pkg/shhh/types.go @@ -0,0 +1,73 @@ +package shhh + +import "context" + +// Severity represents the criticality associated with a redaction finding. +type Severity string + +const ( + // SeverityLow indicates low-impact findings (e.g. non-production credentials). + SeverityLow Severity = "low" + // SeverityMedium indicates medium impact findings (e.g. access tokens). + SeverityMedium Severity = "medium" + // SeverityHigh indicates high-impact findings (e.g. private keys). + SeverityHigh Severity = "high" +) + +// RuleConfig defines a redaction rule that SHHH should enforce. +type RuleConfig struct { + Name string `json:"name"` + Pattern string `json:"pattern"` + ReplacementTemplate string `json:"replacement_template"` + Severity Severity `json:"severity"` + Tags []string `json:"tags"` +} + +// Config controls sentinel behaviour. +type Config struct { + // Disabled toggles redaction off entirely. + Disabled bool `json:"disabled"` + // RedactionPlaceholder overrides the default placeholder value. + RedactionPlaceholder string `json:"redaction_placeholder"` + // DisableDefaultRules disables the built-in curated rule set. + DisableDefaultRules bool `json:"disable_default_rules"` + // CustomRules allows callers to append bespoke redaction patterns. + CustomRules []RuleConfig `json:"custom_rules"` +} + +// Finding represents a single rule firing during redaction. +type Finding struct { + Rule string `json:"rule"` + Severity Severity `json:"severity"` + Tags []string `json:"tags,omitempty"` + Count int `json:"count"` + Locations []Location `json:"locations,omitempty"` +} + +// Location describes where a secret was found. +type Location struct { + Path string `json:"path"` + Count int `json:"count"` +} + +// StatsSnapshot exposes aggregate counters for observability. +type StatsSnapshot struct { + TotalScans uint64 `json:"total_scans"` + TotalFindings uint64 `json:"total_findings"` + PerRuleFindings map[string]uint64 `json:"per_rule_findings"` +} + +// AuditEvent captures a single redaction occurrence for downstream sinks. +type AuditEvent struct { + Rule string `json:"rule"` + Severity Severity `json:"severity"` + Tags []string `json:"tags,omitempty"` + Path string `json:"path,omitempty"` + Hash string `json:"hash"` + Metadata map[string]string `json:"metadata,omitempty"` +} + +// AuditSink receives redaction events for long term storage / replay. +type AuditSink interface { + RecordRedaction(ctx context.Context, event AuditEvent) +} diff --git a/pkg/ucxl/decision_publisher.go b/pkg/ucxl/decision_publisher.go index d8d0a69..ecd5747 100644 --- a/pkg/ucxl/decision_publisher.go +++ b/pkg/ucxl/decision_publisher.go @@ -13,11 +13,11 @@ import ( // DecisionPublisher handles publishing task completion decisions to encrypted DHT storage type DecisionPublisher struct { - ctx context.Context - config *config.Config - dhtStorage storage.UCXLStorage - nodeID string - agentName string + ctx context.Context + config *config.Config + dhtStorage storage.UCXLStorage + nodeID string + agentName string } // NewDecisionPublisher creates a new decision publisher @@ -39,28 +39,28 @@ func NewDecisionPublisher( // TaskDecision represents a decision made by an agent upon task completion type TaskDecision struct { - Agent string `json:"agent"` - Role string `json:"role"` - Project string `json:"project"` - Task string `json:"task"` - Decision string `json:"decision"` - Context map[string]interface{} `json:"context"` - Timestamp time.Time `json:"timestamp"` - Success bool `json:"success"` - ErrorMessage string `json:"error_message,omitempty"` - FilesModified []string `json:"files_modified,omitempty"` - LinesChanged int `json:"lines_changed,omitempty"` - TestResults *TestResults `json:"test_results,omitempty"` - Dependencies []string `json:"dependencies,omitempty"` - NextSteps []string `json:"next_steps,omitempty"` + Agent string `json:"agent"` + Role string `json:"role"` + Project string `json:"project"` + Task string `json:"task"` + Decision string `json:"decision"` + Context map[string]interface{} `json:"context"` + Timestamp time.Time `json:"timestamp"` + Success bool `json:"success"` + ErrorMessage string `json:"error_message,omitempty"` + FilesModified []string `json:"files_modified,omitempty"` + LinesChanged int `json:"lines_changed,omitempty"` + TestResults *TestResults `json:"test_results,omitempty"` + Dependencies []string `json:"dependencies,omitempty"` + NextSteps []string `json:"next_steps,omitempty"` } // TestResults captures test execution results type TestResults struct { - Passed int `json:"passed"` - Failed int `json:"failed"` - Skipped int `json:"skipped"` - Coverage float64 `json:"coverage,omitempty"` + Passed int `json:"passed"` + Failed int `json:"failed"` + Skipped int `json:"skipped"` + Coverage float64 `json:"coverage,omitempty"` FailedTests []string `json:"failed_tests,omitempty"` } @@ -74,7 +74,11 @@ func (dp *DecisionPublisher) PublishTaskDecision(decision *TaskDecision) error { decision.Role = dp.config.Agent.Role } if decision.Project == "" { - decision.Project = "default-project" // TODO: Add project field to config + if project := dp.config.Agent.Project; project != "" { + decision.Project = project + } else { + decision.Project = "chorus" + } } if decision.Timestamp.IsZero() { decision.Timestamp = time.Now() @@ -173,16 +177,16 @@ func (dp *DecisionPublisher) PublishArchitecturalDecision( nextSteps []string, ) error { taskDecision := &TaskDecision{ - Task: taskName, - Decision: decision, - Success: true, + Task: taskName, + Decision: decision, + Success: true, NextSteps: nextSteps, Context: map[string]interface{}{ - "decision_type": "architecture", - "rationale": rationale, - "alternatives": alternatives, - "implications": implications, - "node_id": dp.nodeID, + "decision_type": "architecture", + "rationale": rationale, + "alternatives": alternatives, + "implications": implications, + "node_id": dp.nodeID, }, } @@ -291,7 +295,7 @@ func (dp *DecisionPublisher) SubscribeToDecisions( ) error { // This is a placeholder for future pubsub implementation // For now, we'll implement a simple polling mechanism - + go func() { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() @@ -341,10 +345,10 @@ func (dp *DecisionPublisher) PublishSystemStatus( Decision: status, Success: dp.allHealthChecksPass(healthChecks), Context: map[string]interface{}{ - "decision_type": "system", - "metrics": metrics, - "health_checks": healthChecks, - "node_id": dp.nodeID, + "decision_type": "system", + "metrics": metrics, + "health_checks": healthChecks, + "node_id": dp.nodeID, }, } @@ -364,13 +368,17 @@ func (dp *DecisionPublisher) allHealthChecksPass(healthChecks map[string]bool) b // GetPublisherMetrics returns metrics about the decision publisher func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} { dhtMetrics := dp.dhtStorage.GetMetrics() - - return map[string]interface{}{ - "node_id": dp.nodeID, - "agent_name": dp.agentName, - "current_role": dp.config.Agent.Role, - "project": "default-project", // TODO: Add project field to config - "dht_metrics": dhtMetrics, - "last_publish": time.Now(), // This would be tracked in a real implementation + project := dp.config.Agent.Project + if project == "" { + project = "chorus" } -} \ No newline at end of file + + return map[string]interface{}{ + "node_id": dp.nodeID, + "agent_name": dp.agentName, + "current_role": dp.config.Agent.Role, + "project": project, + "dht_metrics": dhtMetrics, + "last_publish": time.Now(), // This would be tracked in a real implementation + } +} diff --git a/pubsub/pubsub.go b/pubsub/pubsub.go index 7a28473..53be0c5 100644 --- a/pubsub/pubsub.go +++ b/pubsub/pubsub.go @@ -8,9 +8,10 @@ import ( "sync" "time" + "chorus/pkg/shhh" + pubsub "github.com/libp2p/go-libp2p-pubsub" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/peer" - pubsub "github.com/libp2p/go-libp2p-pubsub" ) // PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion @@ -19,36 +20,42 @@ type PubSub struct { host host.Host ctx context.Context cancel context.CancelFunc - + // Topic subscriptions - chorusTopic *pubsub.Topic - hmmmTopic *pubsub.Topic - contextTopic *pubsub.Topic - + chorusTopic *pubsub.Topic + hmmmTopic *pubsub.Topic + contextTopic *pubsub.Topic + // Message subscriptions - chorusSub *pubsub.Subscription - hmmmSub *pubsub.Subscription - contextSub *pubsub.Subscription - + chorusSub *pubsub.Subscription + hmmmSub *pubsub.Subscription + contextSub *pubsub.Subscription + // Dynamic topic management - dynamicTopics map[string]*pubsub.Topic - dynamicTopicsMux sync.RWMutex - dynamicSubs map[string]*pubsub.Subscription - dynamicSubsMux sync.RWMutex + dynamicTopics map[string]*pubsub.Topic + dynamicTopicsMux sync.RWMutex + dynamicSubs map[string]*pubsub.Subscription + dynamicSubsMux sync.RWMutex + dynamicHandlers map[string]func([]byte, peer.ID) + dynamicHandlersMux sync.RWMutex // Configuration - chorusTopicName string - hmmmTopicName string - contextTopicName string + chorusTopicName string + hmmmTopicName string + contextTopicName string // External message handler for HMMM messages - HmmmMessageHandler func(msg Message, from peer.ID) - + HmmmMessageHandler func(msg Message, from peer.ID) + // External message handler for Context Feedback messages ContextFeedbackHandler func(msg Message, from peer.ID) - + // Hypercore-style logging hypercoreLog HypercoreLogger + + // SHHH sentinel + redactor *shhh.Sentinel + redactorMux sync.RWMutex } // HypercoreLogger interface for dependency injection @@ -62,45 +69,45 @@ type MessageType string const ( // Bzzz coordination messages - TaskAnnouncement MessageType = "task_announcement" - TaskClaim MessageType = "task_claim" - TaskProgress MessageType = "task_progress" - TaskComplete MessageType = "task_complete" - CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change + TaskAnnouncement MessageType = "task_announcement" + TaskClaim MessageType = "task_claim" + TaskProgress MessageType = "task_progress" + TaskComplete MessageType = "task_complete" + CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change AvailabilityBcast MessageType = "availability_broadcast" // Regular availability status - + // HMMM meta-discussion messages - MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion - TaskHelpRequest MessageType = "task_help_request" // Request for assistance - TaskHelpResponse MessageType = "task_help_response" // Response to a help request - CoordinationRequest MessageType = "coordination_request" // Request for coordination - CoordinationComplete MessageType = "coordination_complete" // Coordination session completed - DependencyAlert MessageType = "dependency_alert" // Dependency detected - EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed - + MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion + TaskHelpRequest MessageType = "task_help_request" // Request for assistance + TaskHelpResponse MessageType = "task_help_response" // Response to a help request + CoordinationRequest MessageType = "coordination_request" // Request for coordination + CoordinationComplete MessageType = "coordination_complete" // Coordination session completed + DependencyAlert MessageType = "dependency_alert" // Dependency detected + EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed + // Role-based collaboration messages - RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities - ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise - ExpertiseResponse MessageType = "expertise_response" // Response offering expertise - StatusUpdate MessageType = "status_update" // Regular status updates from agents - WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles - RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message - MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship - MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship - ProjectUpdate MessageType = "project_update" // Project-level status updates - DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete - + RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities + ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise + ExpertiseResponse MessageType = "expertise_response" // Response offering expertise + StatusUpdate MessageType = "status_update" // Regular status updates from agents + WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles + RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message + MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship + MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship + ProjectUpdate MessageType = "project_update" // Project-level status updates + DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete + // RL Context Curator feedback messages - FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning - ContextRequest MessageType = "context_request" // Request context from HCFS - ContextResponse MessageType = "context_response" // Response with context data - ContextUsage MessageType = "context_usage" // Report context usage patterns - ContextRelevance MessageType = "context_relevance" // Report context relevance scoring - + FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning + ContextRequest MessageType = "context_request" // Request context from HCFS + ContextResponse MessageType = "context_response" // Response with context data + ContextUsage MessageType = "context_usage" // Report context usage patterns + ContextRelevance MessageType = "context_relevance" // Report context relevance scoring + // SLURP event integration messages - SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event - SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt - SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system + SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event + SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt + SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system ) // Message represents a Bzzz/Antennae message @@ -110,14 +117,14 @@ type Message struct { Timestamp time.Time `json:"timestamp"` Data map[string]interface{} `json:"data"` HopCount int `json:"hop_count,omitempty"` // For Antennae hop limiting - + // Role-based collaboration fields - FromRole string `json:"from_role,omitempty"` // Role of sender - ToRoles []string `json:"to_roles,omitempty"` // Target roles + FromRole string `json:"from_role,omitempty"` // Role of sender + ToRoles []string `json:"to_roles,omitempty"` // Target roles RequiredExpertise []string `json:"required_expertise,omitempty"` // Required expertise areas - ProjectID string `json:"project_id,omitempty"` // Associated project - Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent) - ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID + ProjectID string `json:"project_id,omitempty"` // Associated project + Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent) + ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID } // NewPubSub creates a new PubSub instance for Bzzz coordination and HMMM meta-discussion @@ -150,16 +157,17 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi } p := &PubSub{ - ps: ps, - host: h, - ctx: pubsubCtx, - cancel: cancel, - chorusTopicName: chorusTopic, + ps: ps, + host: h, + ctx: pubsubCtx, + cancel: cancel, + chorusTopicName: chorusTopic, hmmmTopicName: hmmmTopic, contextTopicName: contextTopic, - dynamicTopics: make(map[string]*pubsub.Topic), - dynamicSubs: make(map[string]*pubsub.Subscription), - hypercoreLog: logger, + dynamicTopics: make(map[string]*pubsub.Topic), + dynamicSubs: make(map[string]*pubsub.Subscription), + dynamicHandlers: make(map[string]func([]byte, peer.ID)), + hypercoreLog: logger, } // Join static topics @@ -177,6 +185,13 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi return p, nil } +// SetRedactor wires the SHHH sentinel so outbound messages are sanitized before publication. +func (p *PubSub) SetRedactor(redactor *shhh.Sentinel) { + p.redactorMux.Lock() + defer p.redactorMux.Unlock() + p.redactor = redactor +} + // SetHmmmMessageHandler sets the handler for incoming HMMM messages. func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) { p.HmmmMessageHandler = handler @@ -231,15 +246,21 @@ func (p *PubSub) joinStaticTopics() error { return nil } -// JoinDynamicTopic joins a new topic for a specific task -func (p *PubSub) JoinDynamicTopic(topicName string) error { - p.dynamicTopicsMux.Lock() - defer p.dynamicTopicsMux.Unlock() - p.dynamicSubsMux.Lock() - defer p.dynamicSubsMux.Unlock() +// subscribeDynamicTopic joins a topic and optionally assigns a raw handler. +func (p *PubSub) subscribeDynamicTopic(topicName string, handler func([]byte, peer.ID)) error { + if topicName == "" { + return fmt.Errorf("topic name cannot be empty") + } - if _, exists := p.dynamicTopics[topicName]; exists { - return nil // Already joined + p.dynamicTopicsMux.RLock() + _, exists := p.dynamicTopics[topicName] + p.dynamicTopicsMux.RUnlock() + + if exists { + p.dynamicHandlersMux.Lock() + p.dynamicHandlers[topicName] = handler + p.dynamicHandlersMux.Unlock() + return nil } topic, err := p.ps.Join(topicName) @@ -253,38 +274,68 @@ func (p *PubSub) JoinDynamicTopic(topicName string) error { return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err) } + p.dynamicTopicsMux.Lock() + if _, already := p.dynamicTopics[topicName]; already { + p.dynamicTopicsMux.Unlock() + sub.Cancel() + topic.Close() + p.dynamicHandlersMux.Lock() + p.dynamicHandlers[topicName] = handler + p.dynamicHandlersMux.Unlock() + return nil + } p.dynamicTopics[topicName] = topic - p.dynamicSubs[topicName] = sub + p.dynamicTopicsMux.Unlock() - // Start a handler for this new subscription - go p.handleDynamicMessages(sub) + p.dynamicSubsMux.Lock() + p.dynamicSubs[topicName] = sub + p.dynamicSubsMux.Unlock() + + p.dynamicHandlersMux.Lock() + p.dynamicHandlers[topicName] = handler + p.dynamicHandlersMux.Unlock() + + go p.handleDynamicMessages(topicName, sub) fmt.Printf("βœ… Joined dynamic topic: %s\n", topicName) return nil } +// JoinDynamicTopic joins a new topic for a specific task +func (p *PubSub) JoinDynamicTopic(topicName string) error { + return p.subscribeDynamicTopic(topicName, nil) +} + +// SubscribeRawTopic joins a topic and delivers raw payloads to the provided handler. +func (p *PubSub) SubscribeRawTopic(topicName string, handler func([]byte, peer.ID)) error { + if handler == nil { + return fmt.Errorf("handler cannot be nil") + } + return p.subscribeDynamicTopic(topicName, handler) +} + // JoinRoleBasedTopics joins topics based on role and expertise func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error { var topicsToJoin []string - + // Join role-specific topic if role != "" { roleTopic := fmt.Sprintf("CHORUS/roles/%s/v1", strings.ToLower(strings.ReplaceAll(role, " ", "_"))) topicsToJoin = append(topicsToJoin, roleTopic) } - + // Join expertise-specific topics for _, exp := range expertise { expertiseTopic := fmt.Sprintf("CHORUS/expertise/%s/v1", strings.ToLower(strings.ReplaceAll(exp, " ", "_"))) topicsToJoin = append(topicsToJoin, expertiseTopic) } - + // Join reporting hierarchy topics for _, supervisor := range reportsTo { supervisorTopic := fmt.Sprintf("CHORUS/hierarchy/%s/v1", strings.ToLower(strings.ReplaceAll(supervisor, " ", "_"))) topicsToJoin = append(topicsToJoin, supervisorTopic) } - + // Join all identified topics for _, topicName := range topicsToJoin { if err := p.JoinDynamicTopic(topicName); err != nil { @@ -292,7 +343,7 @@ func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo continue } } - + fmt.Printf("🎯 Joined %d role-based topics for role: %s\n", len(topicsToJoin), role) return nil } @@ -302,7 +353,7 @@ func (p *PubSub) JoinProjectTopic(projectID string) error { if projectID == "" { return fmt.Errorf("project ID cannot be empty") } - + topicName := fmt.Sprintf("CHORUS/projects/%s/coordination/v1", projectID) return p.JoinDynamicTopic(topicName) } @@ -324,6 +375,10 @@ func (p *PubSub) LeaveDynamicTopic(topicName string) { delete(p.dynamicTopics, topicName) } + p.dynamicHandlersMux.Lock() + delete(p.dynamicHandlers, topicName) + p.dynamicHandlersMux.Unlock() + fmt.Printf("πŸ—‘οΈ Left dynamic topic: %s\n", topicName) } @@ -337,11 +392,12 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da return fmt.Errorf("not subscribed to dynamic topic: %s", topicName) } + payload := p.sanitizePayload(topicName, msgType, data) msg := Message{ Type: msgType, From: p.host.ID().String(), Timestamp: time.Now(), - Data: data, + Data: payload, } msgBytes, err := json.Marshal(msg) @@ -356,34 +412,35 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da // wrapping it in the CHORUS Message envelope. Intended for HMMM per-issue rooms // or other modules that maintain their own schemas. func (p *PubSub) PublishRaw(topicName string, payload []byte) error { - // Dynamic topic - p.dynamicTopicsMux.RLock() - if topic, exists := p.dynamicTopics[topicName]; exists { - p.dynamicTopicsMux.RUnlock() - return topic.Publish(p.ctx, payload) - } - p.dynamicTopicsMux.RUnlock() + // Dynamic topic + p.dynamicTopicsMux.RLock() + if topic, exists := p.dynamicTopics[topicName]; exists { + p.dynamicTopicsMux.RUnlock() + return topic.Publish(p.ctx, payload) + } + p.dynamicTopicsMux.RUnlock() - // Static topics by name - switch topicName { - case p.chorusTopicName: - return p.chorusTopic.Publish(p.ctx, payload) - case p.hmmmTopicName: - return p.hmmmTopic.Publish(p.ctx, payload) - case p.contextTopicName: - return p.contextTopic.Publish(p.ctx, payload) - default: - return fmt.Errorf("not subscribed to topic: %s", topicName) - } + // Static topics by name + switch topicName { + case p.chorusTopicName: + return p.chorusTopic.Publish(p.ctx, payload) + case p.hmmmTopicName: + return p.hmmmTopic.Publish(p.ctx, payload) + case p.contextTopicName: + return p.contextTopic.Publish(p.ctx, payload) + default: + return fmt.Errorf("not subscribed to topic: %s", topicName) + } } // PublishBzzzMessage publishes a message to the Bzzz coordination topic func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error { + payload := p.sanitizePayload(p.chorusTopicName, msgType, data) msg := Message{ Type: msgType, From: p.host.ID().String(), Timestamp: time.Now(), - Data: data, + Data: payload, } msgBytes, err := json.Marshal(msg) @@ -396,11 +453,12 @@ func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interfa // PublishHmmmMessage publishes a message to the HMMM meta-discussion topic func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error { + payload := p.sanitizePayload(p.hmmmTopicName, msgType, data) msg := Message{ Type: msgType, From: p.host.ID().String(), Timestamp: time.Now(), - Data: data, + Data: payload, } msgBytes, err := json.Marshal(msg) @@ -425,11 +483,12 @@ func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.I // PublishContextFeedbackMessage publishes a message to the Context Feedback topic func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error { + payload := p.sanitizePayload(p.contextTopicName, msgType, data) msg := Message{ Type: msgType, From: p.host.ID().String(), Timestamp: time.Now(), - Data: data, + Data: payload, } msgBytes, err := json.Marshal(msg) @@ -442,11 +501,16 @@ func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[str // PublishRoleBasedMessage publishes a role-based collaboration message func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error { + topicName := p.chorusTopicName + if isRoleMessage(msgType) { + topicName = p.hmmmTopicName + } + payload := p.sanitizePayload(topicName, msgType, data) msg := Message{ Type: msgType, From: p.host.ID().String(), Timestamp: time.Now(), - Data: data, + Data: payload, FromRole: opts.FromRole, ToRoles: opts.ToRoles, RequiredExpertise: opts.RequiredExpertise, @@ -462,10 +526,8 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in // Determine which topic to use based on message type var topic *pubsub.Topic - switch msgType { - case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate, - WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse, - ProjectUpdate, DeliverableReady: + switch { + case isRoleMessage(msgType): topic = p.hmmmTopic // Use HMMM topic for role-based messages default: topic = p.chorusTopic // Default to Bzzz topic @@ -492,14 +554,14 @@ func (p *PubSub) PublishSlurpContextUpdate(data map[string]interface{}) error { // PublishSlurpIntegrationEvent publishes a generic SLURP integration event func (p *PubSub) PublishSlurpIntegrationEvent(eventType string, discussionID string, slurpEvent map[string]interface{}) error { data := map[string]interface{}{ - "event_type": eventType, - "discussion_id": discussionID, - "slurp_event": slurpEvent, - "timestamp": time.Now(), - "source": "hmmm-slurp-integration", - "peer_id": p.host.ID().String(), + "event_type": eventType, + "discussion_id": discussionID, + "slurp_event": slurpEvent, + "timestamp": time.Now(), + "source": "hmmm-slurp-integration", + "peer_id": p.host.ID().String(), } - + return p.PublishSlurpEventGenerated(data) } @@ -604,15 +666,23 @@ func (p *PubSub) handleContextFeedbackMessages() { } } +// getDynamicHandler returns the raw handler for a topic if registered. +func (p *PubSub) getDynamicHandler(topicName string) func([]byte, peer.ID) { + p.dynamicHandlersMux.RLock() + handler := p.dynamicHandlers[topicName] + p.dynamicHandlersMux.RUnlock() + return handler +} + // handleDynamicMessages processes messages from a dynamic topic subscription -func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) { +func (p *PubSub) handleDynamicMessages(topicName string, sub *pubsub.Subscription) { for { msg, err := sub.Next(p.ctx) if err != nil { if p.ctx.Err() != nil || err.Error() == "subscription cancelled" { return // Subscription was cancelled, exit handler } - fmt.Printf("❌ Error receiving dynamic message: %v\n", err) + fmt.Printf("❌ Error receiving dynamic message on %s: %v\n", topicName, err) continue } @@ -620,13 +690,18 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) { continue } - var dynamicMsg Message - if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil { - fmt.Printf("❌ Failed to unmarshal dynamic message: %v\n", err) + if handler := p.getDynamicHandler(topicName); handler != nil { + handler(msg.Data, msg.ReceivedFrom) continue } - // Use the main HMMM handler for all dynamic messages + var dynamicMsg Message + if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil { + fmt.Printf("❌ Failed to unmarshal dynamic message on %s: %v\n", topicName, err) + continue + } + + // Use the main HMMM handler for all dynamic messages without custom handlers if p.HmmmMessageHandler != nil { p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom) } @@ -636,7 +711,7 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) { // processBzzzMessage handles different types of Bzzz coordination messages func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) { fmt.Printf("🐝 Bzzz [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data) - + // Log to hypercore if logger is available if p.hypercoreLog != nil { logData := map[string]interface{}{ @@ -647,7 +722,7 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) { "data": msg.Data, "topic": "CHORUS", } - + // Map pubsub message types to hypercore log types var logType string switch msg.Type { @@ -666,7 +741,7 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) { default: logType = "network_event" } - + if err := p.hypercoreLog.AppendString(logType, logData); err != nil { fmt.Printf("❌ Failed to log Bzzz message to hypercore: %v\n", err) } @@ -675,9 +750,9 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) { // processHmmmMessage provides default handling for HMMM messages if no external handler is set func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) { - fmt.Printf("🎯 Default HMMM Handler [%s] from %s: %v\n", + fmt.Printf("🎯 Default HMMM Handler [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data) - + // Log to hypercore if logger is available if p.hypercoreLog != nil { logData := map[string]interface{}{ @@ -694,7 +769,7 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) { "priority": msg.Priority, "thread_id": msg.ThreadID, } - + // Map pubsub message types to hypercore log types var logType string switch msg.Type { @@ -717,7 +792,7 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) { default: logType = "collaboration" } - + if err := p.hypercoreLog.AppendString(logType, logData); err != nil { fmt.Printf("❌ Failed to log HMMM message to hypercore: %v\n", err) } @@ -726,25 +801,25 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) { // processContextFeedbackMessage provides default handling for context feedback messages if no external handler is set func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) { - fmt.Printf("🧠 Context Feedback [%s] from %s: %v\n", + fmt.Printf("🧠 Context Feedback [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data) - + // Log to hypercore if logger is available if p.hypercoreLog != nil { logData := map[string]interface{}{ - "message_type": string(msg.Type), - "from_peer": from.String(), - "from_short": from.ShortString(), - "timestamp": msg.Timestamp, - "data": msg.Data, - "topic": "context_feedback", - "from_role": msg.FromRole, - "to_roles": msg.ToRoles, - "project_id": msg.ProjectID, - "priority": msg.Priority, - "thread_id": msg.ThreadID, + "message_type": string(msg.Type), + "from_peer": from.String(), + "from_short": from.ShortString(), + "timestamp": msg.Timestamp, + "data": msg.Data, + "topic": "context_feedback", + "from_role": msg.FromRole, + "to_roles": msg.ToRoles, + "project_id": msg.ProjectID, + "priority": msg.Priority, + "thread_id": msg.ThreadID, } - + // Map context feedback message types to hypercore log types var logType string switch msg.Type { @@ -757,17 +832,79 @@ func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) { default: logType = "context_feedback" } - + if err := p.hypercoreLog.AppendString(logType, logData); err != nil { fmt.Printf("❌ Failed to log Context Feedback message to hypercore: %v\n", err) } } } +func (p *PubSub) sanitizePayload(topic string, msgType MessageType, data map[string]interface{}) map[string]interface{} { + if data == nil { + return nil + } + cloned := clonePayloadMap(data) + p.redactorMux.RLock() + redactor := p.redactor + p.redactorMux.RUnlock() + if redactor != nil { + labels := map[string]string{ + "source": "pubsub", + "topic": topic, + "message_type": string(msgType), + } + redactor.RedactMapWithLabels(context.Background(), cloned, labels) + } + return cloned +} + +func isRoleMessage(msgType MessageType) bool { + switch msgType { + case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate, + WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse, + ProjectUpdate, DeliverableReady: + return true + default: + return false + } +} + +func clonePayloadMap(in map[string]interface{}) map[string]interface{} { + if in == nil { + return nil + } + out := make(map[string]interface{}, len(in)) + for k, v := range in { + out[k] = clonePayloadValue(v) + } + return out +} + +func clonePayloadValue(v interface{}) interface{} { + switch tv := v.(type) { + case map[string]interface{}: + return clonePayloadMap(tv) + case []interface{}: + return clonePayloadSlice(tv) + case []string: + return append([]string(nil), tv...) + default: + return tv + } +} + +func clonePayloadSlice(in []interface{}) []interface{} { + out := make([]interface{}, len(in)) + for i, val := range in { + out[i] = clonePayloadValue(val) + } + return out +} + // Close shuts down the PubSub instance func (p *PubSub) Close() error { p.cancel() - + if p.chorusSub != nil { p.chorusSub.Cancel() } @@ -777,7 +914,7 @@ func (p *PubSub) Close() error { if p.contextSub != nil { p.contextSub.Cancel() } - + if p.chorusTopic != nil { p.chorusTopic.Close() } @@ -787,7 +924,13 @@ func (p *PubSub) Close() error { if p.contextTopic != nil { p.contextTopic.Close() } - + + p.dynamicSubsMux.Lock() + for _, sub := range p.dynamicSubs { + sub.Cancel() + } + p.dynamicSubsMux.Unlock() + p.dynamicTopicsMux.Lock() for _, topic := range p.dynamicTopics { topic.Close()