Harden CHORUS security and messaging stack

This commit is contained in:
anthonyrawlins
2025-09-20 23:21:35 +10:00
parent 57751f277a
commit 1bb736c09a
25 changed files with 2793 additions and 2474 deletions

View File

@@ -10,7 +10,7 @@ CHORUS is the runtime that ties the CHORUS ecosystem together: libp2p mesh, DHT-
| DHT + DecisionPublisher | ✅ Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. | | DHT + DecisionPublisher | ✅ Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. |
| Election manager | ✅ Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. | | Election manager | ✅ Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. |
| SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. | | SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. |
| SHHH (secrets sentinel) | ❌ Not implemented | No `pkg/shhh` module yet; redaction hooks are pending. | | SHHH (secrets sentinel) | 🚧 Sentinel live | `pkg/shhh` redacts hypercore + PubSub payloads with audit + metrics hooks (policy replay TBD). |
| HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). | | HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). |
See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing. See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing.
@@ -33,7 +33,7 @@ Youll get a single agent container with:
- DHT storage (AGE-encrypted) - DHT storage (AGE-encrypted)
- HTTP API + health endpoints - HTTP API + health endpoints
**Missing today:** SLURP context resolution, SHHH redaction, HMMM per-issue routing. Expect log warnings/TODOs for those paths. **Missing today:** SLURP context resolution, advanced SHHH policy replay, HMMM per-issue routing. Expect log warnings/TODOs for those paths.
## Roadmap Highlights ## Roadmap Highlights

View File

@@ -9,13 +9,19 @@ import (
"chorus/internal/logging" "chorus/internal/logging"
"chorus/pkg/config" "chorus/pkg/config"
"chorus/pubsub"
"chorus/pkg/repository"
"chorus/pkg/hmmm" "chorus/pkg/hmmm"
"chorus/pkg/repository"
"chorus/pubsub"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
) )
// TaskProgressTracker is notified when tasks start and complete so availability broadcasts stay accurate.
type TaskProgressTracker interface {
AddTask(taskID string)
RemoveTask(taskID string)
}
// TaskCoordinator manages task discovery, assignment, and execution across multiple repositories // TaskCoordinator manages task discovery, assignment, and execution across multiple repositories
type TaskCoordinator struct { type TaskCoordinator struct {
pubsub *pubsub.PubSub pubsub *pubsub.PubSub
@@ -33,6 +39,7 @@ type TaskCoordinator struct {
activeTasks map[string]*ActiveTask // taskKey -> active task activeTasks map[string]*ActiveTask // taskKey -> active task
taskLock sync.RWMutex taskLock sync.RWMutex
taskMatcher repository.TaskMatcher taskMatcher repository.TaskMatcher
taskTracker TaskProgressTracker
// Agent tracking // Agent tracking
nodeID string nodeID string
@@ -63,7 +70,9 @@ func NewTaskCoordinator(
cfg *config.Config, cfg *config.Config,
nodeID string, nodeID string,
hmmmRouter *hmmm.Router, hmmmRouter *hmmm.Router,
tracker TaskProgressTracker,
) *TaskCoordinator { ) *TaskCoordinator {
coordinator := &TaskCoordinator{ coordinator := &TaskCoordinator{
pubsub: ps, pubsub: ps,
hlog: hlog, hlog: hlog,
@@ -75,6 +84,7 @@ func NewTaskCoordinator(
lastSync: make(map[int]time.Time), lastSync: make(map[int]time.Time),
factory: &repository.DefaultProviderFactory{}, factory: &repository.DefaultProviderFactory{},
taskMatcher: &repository.DefaultTaskMatcher{}, taskMatcher: &repository.DefaultTaskMatcher{},
taskTracker: tracker,
nodeID: nodeID, nodeID: nodeID,
syncInterval: 30 * time.Second, syncInterval: 30 * time.Second,
} }
@@ -185,6 +195,10 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor
tc.agentInfo.CurrentTasks = len(tc.activeTasks) tc.agentInfo.CurrentTasks = len(tc.activeTasks)
tc.taskLock.Unlock() tc.taskLock.Unlock()
if tc.taskTracker != nil {
tc.taskTracker.AddTask(taskKey)
}
// Log task claim // Log task claim
tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{ tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{
"task_number": task.Number, "task_number": task.Number,
@@ -334,6 +348,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
tc.agentInfo.CurrentTasks = len(tc.activeTasks) tc.agentInfo.CurrentTasks = len(tc.activeTasks)
tc.taskLock.Unlock() tc.taskLock.Unlock()
if tc.taskTracker != nil {
tc.taskTracker.RemoveTask(taskKey)
}
// Log completion // Log completion
tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{ tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{
"task_number": activeTask.Task.Number, "task_number": activeTask.Task.Number,

View File

@@ -0,0 +1,30 @@
# Decision Record: Establish SHHH Sentinel Foundations
- **Date:** 2025-02-16
- **Status:** Accepted
- **Context:** CHORUS roadmap Phase 1 requires a secrets sentinel (`pkg/shhh`) before we wire COOEE/WHOOSH telemetry and audit plumbing. The runtime previously emitted placeholder TODOs and logged sensitive payloads without guard rails.
## Problem
- We lacked a reusable component to detect and redact secrets prior to log/telemetry fan-out.
- Without a dedicated sentinel we could not attach audit sinks or surface metrics for redaction events, blocking roadmap item `SEC-SHHH`.
## Decision
- Introduce `pkg/shhh` as the SHHH sentinel with:
- Curated default rules (API keys, bearer/OAuth tokens, private key PEM blocks, OpenAI secrets).
- Extensible configuration for custom regex rules and per-rule severity/tags.
- Optional audit sink and statistics collection for integration with COOEE/WHOOSH pipelines.
- Helpers to redact free-form text and `map[string]any` payloads used by our logging pipeline.
## Rationale
- Starting with a focused set of high-signal rules gives immediate coverage for the most damaging leak classes without delaying larger SLURP/SHHH workstreams.
- The API mirrors other CHORUS subsystems (options, config structs, stats snapshots) so existing operators can plug metrics/audits without bespoke glue.
- Providing deterministic findings/locations simplifies future enforcement (e.g., WHOOSH UI badges, COOEE replay) while keeping implementation lean.
## Impact
- Runtime components can now instantiate SHHH and guarantee `[REDACTED]` placeholders for sensitive fields.
- Audit/event plumbing can be wired incrementally—hashes are emitted for replay without storing raw secrets.
- Future roadmap tasks (policy driven rules, replay, UCXL evidence) can extend `pkg/shhh` rather than implementing ad-hoc redaction in each subsystem.
## Related Work
- Roadmap: `docs/progress/CHORUS-WHOOSH-roadmap.md` (Phase 1.2 `SEC-SHHH`).
- README coverage gap noted in `README.md` table (SHHH not implemented).

2
go.mod
View File

@@ -159,4 +159,4 @@ require (
lukechampine.com/blake3 v1.2.1 // indirect lukechampine.com/blake3 v1.2.1 // indirect
) )
replace github.com/chorus-services/backbeat => /home/tony/chorus/project-queues/active/BACKBEAT/backbeat/prototype replace github.com/chorus-services/backbeat => ../BACKBEAT/backbeat/prototype

View File

@@ -1,6 +1,7 @@
package logging package logging
import ( import (
"context"
"crypto/sha256" "crypto/sha256"
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
@@ -8,6 +9,7 @@ import (
"sync" "sync"
"time" "time"
"chorus/pkg/shhh"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
) )
@@ -29,6 +31,8 @@ type HypercoreLog struct {
// Replication // Replication
replicators map[peer.ID]*Replicator replicators map[peer.ID]*Replicator
redactor *shhh.Sentinel
} }
// LogEntry represents a single entry in the distributed log // LogEntry represents a single entry in the distributed log
@@ -88,6 +92,13 @@ func NewHypercoreLog(peerID peer.ID) *HypercoreLog {
} }
} }
// SetRedactor wires the SHHH sentinel so log payloads are sanitized before persistence.
func (h *HypercoreLog) SetRedactor(redactor *shhh.Sentinel) {
h.mutex.Lock()
defer h.mutex.Unlock()
h.redactor = redactor
}
// AppendString is a convenience method for string log types (to match interface) // AppendString is a convenience method for string log types (to match interface)
func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error { func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error {
_, err := h.Append(LogType(logType), data) _, err := h.Append(LogType(logType), data)
@@ -101,12 +112,14 @@ func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*Lo
index := uint64(len(h.entries)) index := uint64(len(h.entries))
sanitized := h.redactData(logType, data)
entry := LogEntry{ entry := LogEntry{
Index: index, Index: index,
Timestamp: time.Now(), Timestamp: time.Now(),
Author: h.peerID.String(), Author: h.peerID.String(),
Type: logType, Type: logType,
Data: data, Data: sanitized,
PrevHash: h.headHash, PrevHash: h.headHash,
} }
@@ -332,6 +345,65 @@ func (h *HypercoreLog) calculateEntryHash(entry LogEntry) (string, error) {
return hex.EncodeToString(hash[:]), nil return hex.EncodeToString(hash[:]), nil
} }
func (h *HypercoreLog) redactData(logType LogType, data map[string]interface{}) map[string]interface{} {
cloned := cloneLogMap(data)
if cloned == nil {
return nil
}
if h.redactor != nil {
labels := map[string]string{
"source": "hypercore",
"log_type": string(logType),
}
h.redactor.RedactMapWithLabels(context.Background(), cloned, labels)
}
return cloned
}
func cloneLogMap(in map[string]interface{}) map[string]interface{} {
if in == nil {
return nil
}
out := make(map[string]interface{}, len(in))
for k, v := range in {
out[k] = cloneLogValue(v)
}
return out
}
func cloneLogValue(v interface{}) interface{} {
switch tv := v.(type) {
case map[string]interface{}:
return cloneLogMap(tv)
case map[string]any:
converted := make(map[string]interface{}, len(tv))
for k, val := range tv {
converted[k] = cloneLogValue(val)
}
return converted
case []interface{}:
return cloneLogSlice(tv)
case []any:
converted := make([]interface{}, len(tv))
for i, val := range tv {
converted[i] = cloneLogValue(val)
}
return converted
case []string:
return append([]string(nil), tv...)
default:
return tv
}
}
func cloneLogSlice(in []interface{}) []interface{} {
out := make([]interface{}, len(in))
for i, val := range in {
out[i] = cloneLogValue(val)
}
return out
}
// createSignature creates a simplified signature for the entry // createSignature creates a simplified signature for the entry
func (h *HypercoreLog) createSignature(entry LogEntry) string { func (h *HypercoreLog) createSignature(entry LogEntry) string {
// In production, this would use proper cryptographic signatures // In production, this would use proper cryptographic signatures

View File

@@ -2,9 +2,11 @@ package runtime
import ( import (
"context" "context"
"fmt"
"time" "time"
"chorus/internal/logging" "chorus/internal/logging"
"chorus/pkg/dht"
"chorus/pkg/health" "chorus/pkg/health"
"chorus/pkg/shutdown" "chorus/pkg/shutdown"
"chorus/pubsub" "chorus/pubsub"
@@ -126,16 +128,79 @@ func (r *SharedRuntime) statusReporter() {
// announceCapabilitiesOnChange announces capabilities when they change // announceCapabilitiesOnChange announces capabilities when they change
func (r *SharedRuntime) announceCapabilitiesOnChange() { func (r *SharedRuntime) announceCapabilitiesOnChange() {
// Implementation from CHORUS would go here if r.PubSub == nil {
// For now, just log that capabilities would be announced r.Logger.Warn("⚠️ Capability broadcast skipped: PubSub not initialized")
r.Logger.Info("📢 Agent capabilities announcement enabled") return
}
r.Logger.Info("📢 Broadcasting agent capabilities to network")
activeTaskCount := 0
if r.TaskTracker != nil {
activeTaskCount = len(r.TaskTracker.GetActiveTasks())
}
announcement := map[string]interface{}{
"agent_id": r.Config.Agent.ID,
"node_id": r.Node.ID().ShortString(),
"version": AppVersion,
"capabilities": r.Config.Agent.Capabilities,
"expertise": r.Config.Agent.Expertise,
"models": r.Config.Agent.Models,
"specialization": r.Config.Agent.Specialization,
"max_tasks": r.Config.Agent.MaxTasks,
"current_tasks": activeTaskCount,
"timestamp": time.Now().Unix(),
"availability": "ready",
}
if err := r.PubSub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil {
r.Logger.Error("❌ Failed to broadcast capabilities: %v", err)
return
}
r.Logger.Info("✅ Capabilities broadcast published")
// TODO: Watch for live capability changes (role updates, model changes) and re-broadcast
} }
// announceRoleOnStartup announces role when the agent starts // announceRoleOnStartup announces role when the agent starts
func (r *SharedRuntime) announceRoleOnStartup() { func (r *SharedRuntime) announceRoleOnStartup() {
// Implementation from CHORUS would go here role := r.Config.Agent.Role
// For now, just log that role would be announced if role == "" {
r.Logger.Info("🎭 Agent role announcement enabled") r.Logger.Info("🎭 No agent role configured; skipping role announcement")
return
}
if r.PubSub == nil {
r.Logger.Warn("⚠️ Role announcement skipped: PubSub not initialized")
return
}
r.Logger.Info("🎭 Announcing agent role to collaboration mesh")
announcement := map[string]interface{}{
"agent_id": r.Config.Agent.ID,
"node_id": r.Node.ID().ShortString(),
"role": role,
"expertise": r.Config.Agent.Expertise,
"capabilities": r.Config.Agent.Capabilities,
"reports_to": r.Config.Agent.ReportsTo,
"specialization": r.Config.Agent.Specialization,
"timestamp": time.Now().Unix(),
}
opts := pubsub.MessageOptions{
FromRole: role,
Priority: "medium",
ThreadID: fmt.Sprintf("role:%s", role),
}
if err := r.PubSub.PublishRoleBasedMessage(pubsub.RoleAnnouncement, announcement, opts); err != nil {
r.Logger.Error("❌ Failed to announce role: %v", err)
return
}
r.Logger.Info("✅ Role announcement published")
} }
func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) { func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
@@ -170,12 +235,89 @@ func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
healthManager.RegisterCheck(backbeatCheck) healthManager.RegisterCheck(backbeatCheck)
} }
// Add other health checks (P2P, DHT, etc.) // Register enhanced health instrumentation when core subsystems are available
// Implementation from CHORUS would go here if r.PubSub == nil {
r.Logger.Warn("⚠️ Skipping enhanced health checks: PubSub not initialized")
return
}
if r.ElectionManager == nil {
r.Logger.Warn("⚠️ Skipping enhanced health checks: election manager not ready")
return
}
var replication *dht.ReplicationManager
if r.DHTNode != nil {
replication = r.DHTNode.ReplicationManager()
}
enhanced := health.NewEnhancedHealthChecks(
healthManager,
r.ElectionManager,
r.DHTNode,
r.PubSub,
replication,
&simpleLogger{logger: r.Logger},
)
r.EnhancedHealth = enhanced
r.Logger.Info("🩺 Enhanced health checks registered")
} }
func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) { func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) {
// Register components for graceful shutdown if shutdownManager == nil {
// Implementation would register all components that need graceful shutdown r.Logger.Warn("⚠️ Shutdown manager not initialized; graceful teardown skipped")
return
}
if r.HTTPServer != nil {
httpComponent := shutdown.NewGenericComponent("http-api-server", 10, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.HTTPServer.Stop()
})
shutdownManager.Register(httpComponent)
}
if healthManager != nil {
healthComponent := shutdown.NewGenericComponent("health-manager", 15, true).
SetShutdownFunc(func(ctx context.Context) error {
return healthManager.Stop()
})
shutdownManager.Register(healthComponent)
}
if r.UCXIServer != nil {
ucxiComponent := shutdown.NewGenericComponent("ucxi-server", 20, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.UCXIServer.Stop()
})
shutdownManager.Register(ucxiComponent)
}
if r.PubSub != nil {
shutdownManager.Register(shutdown.NewPubSubComponent("pubsub", r.PubSub.Close, 30))
}
if r.DHTNode != nil {
dhtComponent := shutdown.NewGenericComponent("dht-node", 35, true).
SetCloser(r.DHTNode.Close)
shutdownManager.Register(dhtComponent)
}
if r.Node != nil {
shutdownManager.Register(shutdown.NewP2PNodeComponent("p2p-node", r.Node.Close, 40))
}
if r.ElectionManager != nil {
shutdownManager.Register(shutdown.NewElectionManagerComponent("election-manager", r.ElectionManager.Stop, 45))
}
if r.BackbeatIntegration != nil {
backbeatComponent := shutdown.NewGenericComponent("backbeat-integration", 50, true).
SetShutdownFunc(func(ctx context.Context) error {
return r.BackbeatIntegration.Stop()
})
shutdownManager.Register(backbeatComponent)
}
r.Logger.Info("🛡️ Graceful shutdown components registered") r.Logger.Info("🛡️ Graceful shutdown components registered")
} }

View File

@@ -21,8 +21,10 @@ import (
"chorus/pkg/dht" "chorus/pkg/dht"
"chorus/pkg/election" "chorus/pkg/election"
"chorus/pkg/health" "chorus/pkg/health"
"chorus/pkg/shutdown" "chorus/pkg/metrics"
"chorus/pkg/prompt" "chorus/pkg/prompt"
"chorus/pkg/shhh"
"chorus/pkg/shutdown"
"chorus/pkg/ucxi" "chorus/pkg/ucxi"
"chorus/pkg/ucxl" "chorus/pkg/ucxl"
"chorus/pubsub" "chorus/pubsub"
@@ -119,8 +121,11 @@ type SharedRuntime struct {
HTTPServer *api.HTTPServer HTTPServer *api.HTTPServer
UCXIServer *ucxi.Server UCXIServer *ucxi.Server
HealthManager *health.Manager HealthManager *health.Manager
EnhancedHealth *health.EnhancedHealthChecks
ShutdownManager *shutdown.Manager ShutdownManager *shutdown.Manager
TaskTracker *SimpleTaskTracker TaskTracker *SimpleTaskTracker
Metrics *metrics.CHORUSMetrics
Shhh *shhh.Sentinel
} }
// Initialize sets up all shared P2P infrastructure components // Initialize sets up all shared P2P infrastructure components
@@ -166,6 +171,21 @@ func Initialize(appMode string) (*SharedRuntime, error) {
} }
runtime.Logger.Info("✅ AI provider configured successfully") runtime.Logger.Info("✅ AI provider configured successfully")
// Initialize metrics collector
runtime.Metrics = metrics.NewCHORUSMetrics(nil)
// Initialize SHHH sentinel
sentinel, err := shhh.NewSentinel(
shhh.Config{},
shhh.WithFindingObserver(runtime.handleShhhFindings),
)
if err != nil {
return nil, fmt.Errorf("failed to initialize SHHH sentinel: %v", err)
}
sentinel.SetAuditSink(&shhhAuditSink{logger: runtime.Logger})
runtime.Shhh = sentinel
runtime.Logger.Info("🛡️ SHHH sentinel initialized")
// Initialize BACKBEAT integration // Initialize BACKBEAT integration
var backbeatIntegration *backbeat.Integration var backbeatIntegration *backbeat.Integration
backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger) backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger)
@@ -198,6 +218,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
// Initialize Hypercore-style logger for P2P coordination // Initialize Hypercore-style logger for P2P coordination
hlog := logging.NewHypercoreLog(node.ID()) hlog := logging.NewHypercoreLog(node.ID())
if runtime.Shhh != nil {
hlog.SetRedactor(runtime.Shhh)
}
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"}) hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
runtime.HypercoreLog = hlog runtime.HypercoreLog = hlog
runtime.Logger.Info("📝 Hypercore logger initialized") runtime.Logger.Info("📝 Hypercore logger initialized")
@@ -214,6 +237,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create PubSub: %v", err) return nil, fmt.Errorf("failed to create PubSub: %v", err)
} }
if runtime.Shhh != nil {
ps.SetRedactor(runtime.Shhh)
}
runtime.PubSub = ps runtime.PubSub = ps
runtime.Logger.Info("📡 PubSub system initialized") runtime.Logger.Info("📡 PubSub system initialized")
@@ -456,6 +482,19 @@ func (r *SharedRuntime) initializeDHTStorage() error {
} }
func (r *SharedRuntime) initializeServices() error { func (r *SharedRuntime) initializeServices() error {
// Create simple task tracker ahead of coordinator so broadcasts stay accurate
taskTracker := &SimpleTaskTracker{
maxTasks: r.Config.Agent.MaxTasks,
activeTasks: make(map[string]bool),
}
// Connect decision publisher to task tracker if available
if r.DecisionPublisher != nil {
taskTracker.decisionPublisher = r.DecisionPublisher
r.Logger.Info("📤 Task completion decisions will be published to DHT")
}
r.TaskTracker = taskTracker
// === Task Coordination Integration === // === Task Coordination Integration ===
taskCoordinator := coordinator.NewTaskCoordinator( taskCoordinator := coordinator.NewTaskCoordinator(
r.Context, r.Context,
@@ -464,6 +503,7 @@ func (r *SharedRuntime) initializeServices() error {
r.Config, r.Config,
r.Node.ID().ShortString(), r.Node.ID().ShortString(),
nil, // HMMM router placeholder nil, // HMMM router placeholder
taskTracker,
) )
taskCoordinator.Start() taskCoordinator.Start()
@@ -515,23 +555,29 @@ func (r *SharedRuntime) initializeServices() error {
r.Logger.Info("⚪ UCXI server disabled") r.Logger.Info("⚪ UCXI server disabled")
} }
r.UCXIServer = ucxiServer r.UCXIServer = ucxiServer
// Create simple task tracker
taskTracker := &SimpleTaskTracker{
maxTasks: r.Config.Agent.MaxTasks,
activeTasks: make(map[string]bool),
}
// Connect decision publisher to task tracker if available
if r.DecisionPublisher != nil {
taskTracker.decisionPublisher = r.DecisionPublisher
r.Logger.Info("📤 Task completion decisions will be published to DHT")
}
r.TaskTracker = taskTracker
return nil return nil
} }
func (r *SharedRuntime) handleShhhFindings(ctx context.Context, findings []shhh.Finding) {
if r == nil || r.Metrics == nil {
return
}
for _, finding := range findings {
r.Metrics.IncrementSHHHFindings(finding.Rule, string(finding.Severity), finding.Count)
}
}
type shhhAuditSink struct {
logger *SimpleLogger
}
func (s *shhhAuditSink) RecordRedaction(_ context.Context, event shhh.AuditEvent) {
if s == nil || s.logger == nil {
return
}
s.logger.Warn("🔒 SHHH redaction applied (rule=%s severity=%s path=%s)", event.Rule, event.Severity, event.Path)
}
// initializeAIProvider configures the reasoning engine with the appropriate AI provider // initializeAIProvider configures the reasoning engine with the appropriate AI provider
func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error { func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
// Set the AI provider // Set the AI provider

View File

@@ -34,6 +34,7 @@ type AgentConfig struct {
Capabilities []string `yaml:"capabilities"` Capabilities []string `yaml:"capabilities"`
Models []string `yaml:"models"` Models []string `yaml:"models"`
Role string `yaml:"role"` Role string `yaml:"role"`
Project string `yaml:"project"`
Expertise []string `yaml:"expertise"` Expertise []string `yaml:"expertise"`
ReportsTo string `yaml:"reports_to"` ReportsTo string `yaml:"reports_to"`
Deliverables []string `yaml:"deliverables"` Deliverables []string `yaml:"deliverables"`
@@ -149,6 +150,7 @@ func LoadFromEnvironment() (*Config, error) {
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}), Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}), Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
Role: getEnvOrDefault("CHORUS_ROLE", ""), Role: getEnvOrDefault("CHORUS_ROLE", ""),
Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"),
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}), Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""), ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}), Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),

View File

@@ -96,6 +96,46 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
AuthorityLevel: AuthorityAdmin, AuthorityLevel: AuthorityAdmin,
CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"}, CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"},
}, },
"security_expert": {
Name: "security_expert",
Description: "Advanced security analysis and policy work",
Capabilities: []string{"security", "policy", "response"},
AccessLevel: "high",
AuthorityLevel: AuthorityAdmin,
CanDecrypt: []string{"security_expert", "security_engineer", "project_manager"},
},
"senior_software_architect": {
Name: "senior_software_architect",
Description: "Architecture governance and system design",
Capabilities: []string{"architecture", "design", "coordination"},
AccessLevel: "high",
AuthorityLevel: AuthorityAdmin,
CanDecrypt: []string{"senior_software_architect", "project_manager", "backend_developer", "frontend_developer"},
},
"qa_engineer": {
Name: "qa_engineer",
Description: "Quality assurance and testing",
Capabilities: []string{"testing", "validation"},
AccessLevel: "medium",
AuthorityLevel: AuthorityFull,
CanDecrypt: []string{"qa_engineer", "backend_developer", "frontend_developer"},
},
"readonly_user": {
Name: "readonly_user",
Description: "Read-only observer with audit access",
Capabilities: []string{"observation"},
AccessLevel: "low",
AuthorityLevel: AuthorityReadOnly,
CanDecrypt: []string{"readonly_user"},
},
"suggestion_only_role": {
Name: "suggestion_only_role",
Description: "Can propose suggestions but not execute",
Capabilities: []string{"recommendation"},
AccessLevel: "low",
AuthorityLevel: AuthoritySuggestion,
CanDecrypt: []string{"suggestion_only_role"},
},
} }
} }

View File

@@ -6,15 +6,15 @@ import (
"sync" "sync"
"time" "time"
"crypto/sha256"
"github.com/ipfs/go-cid"
dht "github.com/libp2p/go-libp2p-kad-dht"
"github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/core/protocol"
"github.com/libp2p/go-libp2p/core/routing" "github.com/libp2p/go-libp2p/core/routing"
dht "github.com/libp2p/go-libp2p-kad-dht"
"github.com/multiformats/go-multiaddr" "github.com/multiformats/go-multiaddr"
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
"github.com/ipfs/go-cid"
"crypto/sha256"
) )
// LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery // LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery
@@ -24,6 +24,7 @@ type LibP2PDHT struct {
ctx context.Context ctx context.Context
cancel context.CancelFunc cancel context.CancelFunc
config *Config config *Config
startTime time.Time
// Bootstrap state // Bootstrap state
bootstrapped bool bootstrapped bool
@@ -59,6 +60,8 @@ type Config struct {
} }
// PeerInfo holds information about discovered peers // PeerInfo holds information about discovered peers
const defaultProviderResultLimit = 20
type PeerInfo struct { type PeerInfo struct {
ID peer.ID ID peer.ID
Addresses []multiaddr.Multiaddr Addresses []multiaddr.Multiaddr
@@ -79,6 +82,11 @@ func DefaultConfig() *Config {
} }
} }
// NewDHT is a backward compatible helper that delegates to NewLibP2PDHT.
func NewDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
return NewLibP2PDHT(ctx, host, opts...)
}
// NewLibP2PDHT creates a new LibP2PDHT instance // NewLibP2PDHT creates a new LibP2PDHT instance
func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) { func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
config := DefaultConfig() config := DefaultConfig()
@@ -105,6 +113,7 @@ func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PD
ctx: dhtCtx, ctx: dhtCtx,
cancel: cancel, cancel: cancel,
config: config, config: config,
startTime: time.Now(),
knownPeers: make(map[peer.ID]*PeerInfo), knownPeers: make(map[peer.ID]*PeerInfo),
} }
@@ -271,23 +280,24 @@ func (d *LibP2PDHT) FindProviders(ctx context.Context, key string, limit int) ([
return nil, fmt.Errorf("failed to create CID from key: %w", err) return nil, fmt.Errorf("failed to create CID from key: %w", err)
} }
// Find providers (FindProviders returns a channel and an error) maxProviders := limit
providersChan, err := d.kdht.FindProviders(ctx, keyCID) if maxProviders <= 0 {
if err != nil { maxProviders = defaultProviderResultLimit
return nil, fmt.Errorf("failed to find providers: %w", err)
} }
// Collect providers from channel providerCtx, cancel := context.WithCancel(ctx)
providers := make([]peer.AddrInfo, 0, limit) defer cancel()
// TODO: Fix libp2p FindProviders channel type mismatch
// The channel appears to return int instead of peer.AddrInfo in this version providersChan := d.kdht.FindProvidersAsync(providerCtx, keyCID, maxProviders)
_ = providersChan // Avoid unused variable error providers := make([]peer.AddrInfo, 0, maxProviders)
// for providerInfo := range providersChan {
// providers = append(providers, providerInfo) for providerInfo := range providersChan {
// if len(providers) >= limit { providers = append(providers, providerInfo)
// break if limit > 0 && len(providers) >= limit {
// } cancel()
// } break
}
}
return providers, nil return providers, nil
} }
@@ -329,6 +339,22 @@ func (d *LibP2PDHT) GetConnectedPeers() []peer.ID {
return d.kdht.Host().Network().Peers() return d.kdht.Host().Network().Peers()
} }
// GetStats reports basic runtime statistics for the DHT
func (d *LibP2PDHT) GetStats() DHTStats {
stats := DHTStats{
TotalPeers: len(d.GetConnectedPeers()),
Uptime: time.Since(d.startTime),
}
if d.replicationManager != nil {
if metrics := d.replicationManager.GetMetrics(); metrics != nil {
stats.TotalKeys = int(metrics.TotalKeys)
}
}
return stats
}
// RegisterPeer registers a peer with capability information // RegisterPeer registers a peer with capability information
func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) { func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) {
d.peersMutex.Lock() d.peersMutex.Lock()
@@ -617,6 +643,11 @@ func (d *LibP2PDHT) IsReplicationEnabled() bool {
return d.replicationManager != nil return d.replicationManager != nil
} }
// ReplicationManager returns the underlying replication manager if enabled.
func (d *LibP2PDHT) ReplicationManager() *ReplicationManager {
return d.replicationManager
}
// Close shuts down the DHT // Close shuts down the DHT
func (d *LibP2PDHT) Close() error { func (d *LibP2PDHT) Close() error {
// Stop replication manager first // Stop replication manager first

View File

@@ -2,546 +2,155 @@ package dht
import ( import (
"context" "context"
"strings"
"testing" "testing"
"time" "time"
"github.com/libp2p/go-libp2p" libp2p "github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/host" dhtmode "github.com/libp2p/go-libp2p-kad-dht"
"github.com/libp2p/go-libp2p/core/test" "github.com/libp2p/go-libp2p/core/test"
dht "github.com/libp2p/go-libp2p-kad-dht"
"github.com/multiformats/go-multiaddr"
) )
type harness struct {
ctx context.Context
host libp2pHost
dht *LibP2PDHT
}
type libp2pHost interface {
Close() error
}
func newHarness(t *testing.T, opts ...Option) *harness {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
if err != nil {
cancel()
t.Fatalf("failed to create libp2p host: %v", err)
}
options := append([]Option{WithAutoBootstrap(false)}, opts...)
d, err := NewLibP2PDHT(ctx, host, options...)
if err != nil {
host.Close()
cancel()
t.Fatalf("failed to create DHT: %v", err)
}
t.Cleanup(func() {
d.Close()
host.Close()
cancel()
})
return &harness{ctx: ctx, host: host, dht: d}
}
func TestDefaultConfig(t *testing.T) { func TestDefaultConfig(t *testing.T) {
config := DefaultConfig() cfg := DefaultConfig()
if config.ProtocolPrefix != "/CHORUS" { if cfg.ProtocolPrefix != "/CHORUS" {
t.Errorf("expected protocol prefix '/CHORUS', got %s", config.ProtocolPrefix) t.Fatalf("expected protocol prefix '/CHORUS', got %s", cfg.ProtocolPrefix)
} }
if config.BootstrapTimeout != 30*time.Second { if cfg.BootstrapTimeout != 30*time.Second {
t.Errorf("expected bootstrap timeout 30s, got %v", config.BootstrapTimeout) t.Fatalf("expected bootstrap timeout 30s, got %v", cfg.BootstrapTimeout)
} }
if config.Mode != dht.ModeAuto { if cfg.Mode != dhtmode.ModeAuto {
t.Errorf("expected mode auto, got %v", config.Mode) t.Fatalf("expected mode auto, got %v", cfg.Mode)
} }
if !config.AutoBootstrap { if !cfg.AutoBootstrap {
t.Error("expected auto bootstrap to be enabled") t.Fatal("expected auto bootstrap to be enabled")
} }
} }
func TestNewDHT(t *testing.T) { func TestWithOptionsOverridesDefaults(t *testing.T) {
ctx := context.Background() h := newHarness(t,
// Create a test host
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
// Test with default options
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
if d.host != host {
t.Error("host not set correctly")
}
if d.config.ProtocolPrefix != "/CHORUS" {
t.Errorf("expected protocol prefix '/CHORUS', got %s", d.config.ProtocolPrefix)
}
}
func TestDHTWithOptions(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
// Test with custom options
d, err := NewDHT(ctx, host,
WithProtocolPrefix("/custom"), WithProtocolPrefix("/custom"),
WithMode(dht.ModeClient), WithDiscoveryInterval(2*time.Minute),
WithBootstrapTimeout(60*time.Second), WithBootstrapTimeout(45*time.Second),
WithDiscoveryInterval(120*time.Second), WithMode(dhtmode.ModeClient),
WithAutoBootstrap(false), WithAutoBootstrap(true),
) )
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
if d.config.ProtocolPrefix != "/custom" { cfg := h.dht.config
t.Errorf("expected protocol prefix '/custom', got %s", d.config.ProtocolPrefix)
if cfg.ProtocolPrefix != "/custom" {
t.Fatalf("expected protocol prefix '/custom', got %s", cfg.ProtocolPrefix)
} }
if d.config.Mode != dht.ModeClient { if cfg.DiscoveryInterval != 2*time.Minute {
t.Errorf("expected mode client, got %v", d.config.Mode) t.Fatalf("expected discovery interval 2m, got %v", cfg.DiscoveryInterval)
} }
if d.config.BootstrapTimeout != 60*time.Second { if cfg.BootstrapTimeout != 45*time.Second {
t.Errorf("expected bootstrap timeout 60s, got %v", d.config.BootstrapTimeout) t.Fatalf("expected bootstrap timeout 45s, got %v", cfg.BootstrapTimeout)
} }
if d.config.DiscoveryInterval != 120*time.Second { if cfg.Mode != dhtmode.ModeClient {
t.Errorf("expected discovery interval 120s, got %v", d.config.DiscoveryInterval) t.Fatalf("expected mode client, got %v", cfg.Mode)
} }
if d.config.AutoBootstrap { if !cfg.AutoBootstrap {
t.Error("expected auto bootstrap to be disabled") t.Fatal("expected auto bootstrap to remain enabled")
} }
} }
func TestWithBootstrapPeersFromStrings(t *testing.T) { func TestProvideRequiresBootstrap(t *testing.T) {
ctx := context.Background() h := newHarness(t)
host, err := libp2p.New() err := h.dht.Provide(h.ctx, "key")
if err != nil { if err == nil {
t.Fatalf("failed to create test host: %v", err) t.Fatal("expected Provide to fail when not bootstrapped")
}
defer host.Close()
bootstrapAddrs := []string{
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1",
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2",
} }
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs)) if !strings.Contains(err.Error(), "not bootstrapped") {
if err != nil { t.Fatalf("expected error to indicate bootstrap requirement, got %v", err)
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
if len(d.config.BootstrapPeers) != 2 {
t.Errorf("expected 2 bootstrap peers, got %d", len(d.config.BootstrapPeers))
}
}
func TestWithBootstrapPeersFromStringsInvalid(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
// Include invalid addresses - they should be filtered out
bootstrapAddrs := []string{
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", // valid
"invalid-address", // invalid
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", // valid
}
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Should have filtered out the invalid address
if len(d.config.BootstrapPeers) != 2 {
t.Errorf("expected 2 valid bootstrap peers, got %d", len(d.config.BootstrapPeers))
}
}
func TestBootstrapWithoutPeers(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Bootstrap should use default IPFS peers when none configured
err = d.Bootstrap()
// This might fail in test environment without network access, but should not panic
if err != nil {
// Expected in test environment
t.Logf("Bootstrap failed as expected in test environment: %v", err)
}
}
func TestIsBootstrapped(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Should not be bootstrapped initially
if d.IsBootstrapped() {
t.Error("DHT should not be bootstrapped initially")
} }
} }
func TestRegisterPeer(t *testing.T) { func TestRegisterPeer(t *testing.T) {
ctx := context.Background() h := newHarness(t)
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
peerID := test.RandPeerIDFatal(t) peerID := test.RandPeerIDFatal(t)
agent := "claude"
role := "frontend"
capabilities := []string{"react", "javascript"}
d.RegisterPeer(peerID, agent, role, capabilities) h.dht.RegisterPeer(peerID, "apollo", "platform", []string{"go"})
knownPeers := d.GetKnownPeers() peers := h.dht.GetKnownPeers()
if len(knownPeers) != 1 {
t.Errorf("expected 1 known peer, got %d", len(knownPeers)) info, ok := peers[peerID]
if !ok {
t.Fatalf("expected peer to be tracked")
} }
peerInfo, exists := knownPeers[peerID] if info.Agent != "apollo" {
if !exists { t.Fatalf("expected agent apollo, got %s", info.Agent)
t.Error("peer not found in known peers")
} }
if peerInfo.Agent != agent { if info.Role != "platform" {
t.Errorf("expected agent %s, got %s", agent, peerInfo.Agent) t.Fatalf("expected role platform, got %s", info.Role)
} }
if peerInfo.Role != role { if len(info.Capabilities) != 1 || info.Capabilities[0] != "go" {
t.Errorf("expected role %s, got %s", role, peerInfo.Role) t.Fatalf("expected capability go, got %v", info.Capabilities)
}
if len(peerInfo.Capabilities) != len(capabilities) {
t.Errorf("expected %d capabilities, got %d", len(capabilities), len(peerInfo.Capabilities))
} }
} }
func TestGetConnectedPeers(t *testing.T) { func TestGetStatsProvidesUptime(t *testing.T) {
ctx := context.Background() h := newHarness(t)
host, err := libp2p.New() stats := h.dht.GetStats()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host) if stats.TotalPeers != 0 {
if err != nil { t.Fatalf("expected zero peers, got %d", stats.TotalPeers)
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Initially should have no connected peers
peers := d.GetConnectedPeers()
if len(peers) != 0 {
t.Errorf("expected 0 connected peers, got %d", len(peers))
}
} }
func TestPutAndGetValue(t *testing.T) { if stats.Uptime < 0 {
ctx := context.Background() t.Fatalf("expected non-negative uptime, got %v", stats.Uptime)
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Test without bootstrap (should fail)
key := "test-key"
value := []byte("test-value")
err = d.PutValue(ctx, key, value)
if err == nil {
t.Error("PutValue should fail when DHT not bootstrapped")
}
_, err = d.GetValue(ctx, key)
if err == nil {
t.Error("GetValue should fail when DHT not bootstrapped")
}
}
func TestProvideAndFindProviders(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Test without bootstrap (should fail)
key := "test-service"
err = d.Provide(ctx, key)
if err == nil {
t.Error("Provide should fail when DHT not bootstrapped")
}
_, err = d.FindProviders(ctx, key, 10)
if err == nil {
t.Error("FindProviders should fail when DHT not bootstrapped")
}
}
func TestFindPeer(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Test without bootstrap (should fail)
peerID := test.RandPeerIDFatal(t)
_, err = d.FindPeer(ctx, peerID)
if err == nil {
t.Error("FindPeer should fail when DHT not bootstrapped")
}
}
func TestFindPeersByRole(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Register some local peers
peerID1 := test.RandPeerIDFatal(t)
peerID2 := test.RandPeerIDFatal(t)
d.RegisterPeer(peerID1, "claude", "frontend", []string{"react"})
d.RegisterPeer(peerID2, "claude", "backend", []string{"go"})
// Find frontend peers
frontendPeers, err := d.FindPeersByRole(ctx, "frontend")
if err != nil {
t.Fatalf("failed to find peers by role: %v", err)
}
if len(frontendPeers) != 1 {
t.Errorf("expected 1 frontend peer, got %d", len(frontendPeers))
}
if frontendPeers[0].ID != peerID1 {
t.Error("wrong peer returned for frontend role")
}
// Find all peers with wildcard
allPeers, err := d.FindPeersByRole(ctx, "*")
if err != nil {
t.Fatalf("failed to find all peers: %v", err)
}
if len(allPeers) != 2 {
t.Errorf("expected 2 peers with wildcard, got %d", len(allPeers))
}
}
func TestAnnounceRole(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Should fail when not bootstrapped
err = d.AnnounceRole(ctx, "frontend")
if err == nil {
t.Error("AnnounceRole should fail when DHT not bootstrapped")
}
}
func TestAnnounceCapability(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Should fail when not bootstrapped
err = d.AnnounceCapability(ctx, "react")
if err == nil {
t.Error("AnnounceCapability should fail when DHT not bootstrapped")
}
}
func TestGetRoutingTable(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
rt := d.GetRoutingTable()
if rt == nil {
t.Error("routing table should not be nil")
}
}
func TestGetDHTSize(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
size := d.GetDHTSize()
// Should be 0 or small initially
if size < 0 {
t.Errorf("DHT size should be non-negative, got %d", size)
}
}
func TestRefreshRoutingTable(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
// Should fail when not bootstrapped
err = d.RefreshRoutingTable()
if err == nil {
t.Error("RefreshRoutingTable should fail when DHT not bootstrapped")
}
}
func TestHost(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
defer d.Close()
if d.Host() != host {
t.Error("Host() should return the same host instance")
}
}
func TestClose(t *testing.T) {
ctx := context.Background()
host, err := libp2p.New()
if err != nil {
t.Fatalf("failed to create test host: %v", err)
}
defer host.Close()
d, err := NewDHT(ctx, host)
if err != nil {
t.Fatalf("failed to create DHT: %v", err)
}
// Should close without error
err = d.Close()
if err != nil {
t.Errorf("Close() failed: %v", err)
} }
} }

View File

@@ -2,559 +2,155 @@ package dht
import ( import (
"context" "context"
"strings"
"testing" "testing"
"time" "time"
"chorus/pkg/config" "chorus/pkg/config"
) )
// TestDHTSecurityPolicyEnforcement tests security policy enforcement in DHT operations type securityTestCase struct {
func TestDHTSecurityPolicyEnforcement(t *testing.T) {
ctx := context.Background()
testCases := []struct {
name string name string
currentRole string role string
operation string address string
ucxlAddress string
contentType string contentType string
expectSuccess bool expectSuccess bool
expectedError string expectErrHint string
}{
// Store operation tests
{
name: "admin_can_store_all_content",
currentRole: "admin",
operation: "store",
ucxlAddress: "agent1:admin:system:security_audit",
contentType: "decision",
expectSuccess: true,
},
{
name: "backend_developer_can_store_backend_content",
currentRole: "backend_developer",
operation: "store",
ucxlAddress: "agent1:backend_developer:api:endpoint_design",
contentType: "suggestion",
expectSuccess: true,
},
{
name: "readonly_role_cannot_store",
currentRole: "readonly_user",
operation: "store",
ucxlAddress: "agent1:readonly_user:project:observation",
contentType: "suggestion",
expectSuccess: false,
expectedError: "read-only authority",
},
{
name: "unknown_role_cannot_store",
currentRole: "invalid_role",
operation: "store",
ucxlAddress: "agent1:invalid_role:project:task",
contentType: "decision",
expectSuccess: false,
expectedError: "unknown creator role",
},
// Retrieve operation tests
{
name: "any_valid_role_can_retrieve",
currentRole: "qa_engineer",
operation: "retrieve",
ucxlAddress: "agent1:backend_developer:api:test_data",
expectSuccess: true,
},
{
name: "unknown_role_cannot_retrieve",
currentRole: "nonexistent_role",
operation: "retrieve",
ucxlAddress: "agent1:backend_developer:api:test_data",
expectSuccess: false,
expectedError: "unknown current role",
},
// Announce operation tests
{
name: "coordination_role_can_announce",
currentRole: "senior_software_architect",
operation: "announce",
ucxlAddress: "agent1:senior_software_architect:architecture:blueprint",
expectSuccess: true,
},
{
name: "decision_role_can_announce",
currentRole: "security_expert",
operation: "announce",
ucxlAddress: "agent1:security_expert:security:policy",
expectSuccess: true,
},
{
name: "suggestion_role_cannot_announce",
currentRole: "suggestion_only_role",
operation: "announce",
ucxlAddress: "agent1:suggestion_only_role:project:idea",
expectSuccess: false,
expectedError: "lacks authority",
},
{
name: "readonly_role_cannot_announce",
currentRole: "readonly_user",
operation: "announce",
ucxlAddress: "agent1:readonly_user:project:observation",
expectSuccess: false,
expectedError: "lacks authority",
},
} }
for _, tc := range testCases { func newTestEncryptedStorage(cfg *config.Config) *EncryptedDHTStorage {
t.Run(tc.name, func(t *testing.T) {
// Create test configuration
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-agent",
Role: tc.currentRole,
},
Security: config.SecurityConfig{
KeyRotationDays: 90,
AuditLogging: true,
AuditPath: "/tmp/test-security-audit.log",
},
}
// Create mock encrypted storage
eds := createMockEncryptedStorage(ctx, cfg)
var err error
switch tc.operation {
case "store":
err = eds.checkStoreAccessPolicy(tc.currentRole, tc.ucxlAddress, tc.contentType)
case "retrieve":
err = eds.checkRetrieveAccessPolicy(tc.currentRole, tc.ucxlAddress)
case "announce":
err = eds.checkAnnounceAccessPolicy(tc.currentRole, tc.ucxlAddress)
}
if tc.expectSuccess {
if err != nil {
t.Errorf("Expected %s operation to succeed for role %s, but got error: %v",
tc.operation, tc.currentRole, err)
}
} else {
if err == nil {
t.Errorf("Expected %s operation to fail for role %s, but it succeeded",
tc.operation, tc.currentRole)
}
if tc.expectedError != "" && !containsSubstring(err.Error(), tc.expectedError) {
t.Errorf("Expected error to contain '%s', got '%s'", tc.expectedError, err.Error())
}
}
})
}
}
// TestDHTAuditLogging tests comprehensive audit logging for DHT operations
func TestDHTAuditLogging(t *testing.T) {
ctx := context.Background()
testCases := []struct {
name string
operation string
role string
ucxlAddress string
success bool
errorMsg string
expectAudit bool
}{
{
name: "successful_store_operation",
operation: "store",
role: "backend_developer",
ucxlAddress: "agent1:backend_developer:api:user_service",
success: true,
expectAudit: true,
},
{
name: "failed_store_operation",
operation: "store",
role: "readonly_user",
ucxlAddress: "agent1:readonly_user:project:readonly_attempt",
success: false,
errorMsg: "read-only authority",
expectAudit: true,
},
{
name: "successful_retrieve_operation",
operation: "retrieve",
role: "frontend_developer",
ucxlAddress: "agent1:backend_developer:api:user_data",
success: true,
expectAudit: true,
},
{
name: "successful_announce_operation",
operation: "announce",
role: "senior_software_architect",
ucxlAddress: "agent1:senior_software_architect:architecture:system_design",
success: true,
expectAudit: true,
},
{
name: "audit_disabled_no_logging",
operation: "store",
role: "backend_developer",
ucxlAddress: "agent1:backend_developer:api:no_audit",
success: true,
expectAudit: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create configuration with audit logging
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-agent",
Role: tc.role,
},
Security: config.SecurityConfig{
KeyRotationDays: 90,
AuditLogging: tc.expectAudit,
AuditPath: "/tmp/test-dht-audit.log",
},
}
// Create mock encrypted storage
eds := createMockEncryptedStorage(ctx, cfg)
// Capture audit output
auditCaptured := false
// Simulate audit operation
switch tc.operation {
case "store":
// Mock the audit function call
if tc.expectAudit && cfg.Security.AuditLogging {
eds.auditStoreOperation(tc.ucxlAddress, tc.role, "test-content", 1024, tc.success, tc.errorMsg)
auditCaptured = true
}
case "retrieve":
if tc.expectAudit && cfg.Security.AuditLogging {
eds.auditRetrieveOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
auditCaptured = true
}
case "announce":
if tc.expectAudit && cfg.Security.AuditLogging {
eds.auditAnnounceOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
auditCaptured = true
}
}
// Verify audit logging behavior
if tc.expectAudit && !auditCaptured {
t.Errorf("Expected audit logging for %s operation but none was captured", tc.operation)
}
if !tc.expectAudit && auditCaptured {
t.Errorf("Expected no audit logging for %s operation but audit was captured", tc.operation)
}
})
}
}
// TestSecurityConfigIntegration tests integration with SecurityConfig
func TestSecurityConfigIntegration(t *testing.T) {
ctx := context.Background()
testConfigs := []struct {
name string
auditLogging bool
auditPath string
expectAuditWork bool
}{
{
name: "audit_enabled_with_path",
auditLogging: true,
auditPath: "/tmp/test-audit-enabled.log",
expectAuditWork: true,
},
{
name: "audit_disabled",
auditLogging: false,
auditPath: "/tmp/test-audit-disabled.log",
expectAuditWork: false,
},
{
name: "audit_enabled_no_path",
auditLogging: true,
auditPath: "",
expectAuditWork: false,
},
}
for _, tc := range testConfigs {
t.Run(tc.name, func(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-agent",
Role: "backend_developer",
},
Security: config.SecurityConfig{
KeyRotationDays: 90,
AuditLogging: tc.auditLogging,
AuditPath: tc.auditPath,
},
}
eds := createMockEncryptedStorage(ctx, cfg)
// Test audit function behavior with different configurations
auditWorked := func() bool {
if !cfg.Security.AuditLogging || cfg.Security.AuditPath == "" {
return false
}
return true
}()
if auditWorked != tc.expectAuditWork {
t.Errorf("Expected audit to work: %v, but got: %v", tc.expectAuditWork, auditWorked)
}
})
}
}
// TestRoleAuthorityHierarchy tests role authority hierarchy enforcement
func TestRoleAuthorityHierarchy(t *testing.T) {
ctx := context.Background()
// Test role authority levels for different operations
authorityTests := []struct {
role string
authorityLevel config.AuthorityLevel
canStore bool
canRetrieve bool
canAnnounce bool
}{
{
role: "admin",
authorityLevel: config.AuthorityMaster,
canStore: true,
canRetrieve: true,
canAnnounce: true,
},
{
role: "senior_software_architect",
authorityLevel: config.AuthorityDecision,
canStore: true,
canRetrieve: true,
canAnnounce: true,
},
{
role: "security_expert",
authorityLevel: config.AuthorityCoordination,
canStore: true,
canRetrieve: true,
canAnnounce: true,
},
{
role: "backend_developer",
authorityLevel: config.AuthoritySuggestion,
canStore: true,
canRetrieve: true,
canAnnounce: false,
},
}
for _, tt := range authorityTests {
t.Run(tt.role+"_authority_test", func(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-agent",
Role: tt.role,
},
Security: config.SecurityConfig{
KeyRotationDays: 90,
AuditLogging: true,
AuditPath: "/tmp/test-authority.log",
},
}
eds := createMockEncryptedStorage(ctx, cfg)
// Test store permission
storeErr := eds.checkStoreAccessPolicy(tt.role, "test:address", "content")
if tt.canStore && storeErr != nil {
t.Errorf("Role %s should be able to store but got error: %v", tt.role, storeErr)
}
if !tt.canStore && storeErr == nil {
t.Errorf("Role %s should not be able to store but operation succeeded", tt.role)
}
// Test retrieve permission
retrieveErr := eds.checkRetrieveAccessPolicy(tt.role, "test:address")
if tt.canRetrieve && retrieveErr != nil {
t.Errorf("Role %s should be able to retrieve but got error: %v", tt.role, retrieveErr)
}
if !tt.canRetrieve && retrieveErr == nil {
t.Errorf("Role %s should not be able to retrieve but operation succeeded", tt.role)
}
// Test announce permission
announceErr := eds.checkAnnounceAccessPolicy(tt.role, "test:address")
if tt.canAnnounce && announceErr != nil {
t.Errorf("Role %s should be able to announce but got error: %v", tt.role, announceErr)
}
if !tt.canAnnounce && announceErr == nil {
t.Errorf("Role %s should not be able to announce but operation succeeded", tt.role)
}
})
}
}
// TestSecurityMetrics tests security-related metrics
func TestSecurityMetrics(t *testing.T) {
ctx := context.Background()
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-agent",
Role: "backend_developer",
},
Security: config.SecurityConfig{
KeyRotationDays: 90,
AuditLogging: true,
AuditPath: "/tmp/test-metrics.log",
},
}
eds := createMockEncryptedStorage(ctx, cfg)
// Simulate some operations to generate metrics
for i := 0; i < 5; i++ {
eds.metrics.StoredItems++
eds.metrics.RetrievedItems++
eds.metrics.EncryptionOps++
eds.metrics.DecryptionOps++
}
metrics := eds.GetMetrics()
expectedMetrics := map[string]int64{
"stored_items": 5,
"retrieved_items": 5,
"encryption_ops": 5,
"decryption_ops": 5,
}
for metricName, expectedValue := range expectedMetrics {
if actualValue, ok := metrics[metricName]; !ok {
t.Errorf("Expected metric %s to be present in metrics", metricName)
} else if actualValue != expectedValue {
t.Errorf("Expected %s to be %d, got %v", metricName, expectedValue, actualValue)
}
}
}
// Helper functions
func createMockEncryptedStorage(ctx context.Context, cfg *config.Config) *EncryptedDHTStorage {
return &EncryptedDHTStorage{ return &EncryptedDHTStorage{
ctx: ctx, ctx: context.Background(),
config: cfg, config: cfg,
nodeID: "test-node-id", nodeID: "test-node",
cache: make(map[string]*CachedEntry), cache: make(map[string]*CachedEntry),
metrics: &StorageMetrics{ metrics: &StorageMetrics{LastUpdate: time.Now()},
LastUpdate: time.Now(), }
}
func TestCheckStoreAccessPolicy(t *testing.T) {
cases := []securityTestCase{
{
name: "backend developer can store",
role: "backend_developer",
address: "agent1:backend_developer:api:endpoint",
contentType: "decision",
expectSuccess: true,
}, },
} {
} name: "project manager can store",
role: "project_manager",
func containsSubstring(str, substr string) bool { address: "agent1:project_manager:plan:milestone",
if len(substr) == 0 { contentType: "decision",
return true expectSuccess: true,
}
if len(str) < len(substr) {
return false
}
for i := 0; i <= len(str)-len(substr); i++ {
if str[i:i+len(substr)] == substr {
return true
}
}
return false
}
// Benchmarks for security performance
func BenchmarkSecurityPolicyChecks(b *testing.B) {
ctx := context.Background()
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "bench-agent",
Role: "backend_developer",
}, },
Security: config.SecurityConfig{ {
KeyRotationDays: 90, name: "read only user cannot store",
AuditLogging: true, role: "readonly_user",
AuditPath: "/tmp/bench-security.log", address: "agent1:readonly_user:note:observation",
contentType: "note",
expectSuccess: false,
expectErrHint: "read-only authority",
},
{
name: "unknown role rejected",
role: "ghost_role",
address: "agent1:ghost_role:context",
contentType: "decision",
expectSuccess: false,
expectErrHint: "unknown creator role",
}, },
} }
eds := createMockEncryptedStorage(ctx, cfg) cfg := &config.Config{Agent: config.AgentConfig{}}
eds := newTestEncryptedStorage(cfg)
b.ResetTimer() for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
b.Run("store_policy_check", func(b *testing.B) { err := eds.checkStoreAccessPolicy(tc.role, tc.address, tc.contentType)
for i := 0; i < b.N; i++ { verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
eds.checkStoreAccessPolicy("backend_developer", "test:address", "content")
}
})
b.Run("retrieve_policy_check", func(b *testing.B) {
for i := 0; i < b.N; i++ {
eds.checkRetrieveAccessPolicy("backend_developer", "test:address")
}
})
b.Run("announce_policy_check", func(b *testing.B) {
for i := 0; i < b.N; i++ {
eds.checkAnnounceAccessPolicy("senior_software_architect", "test:address")
}
}) })
} }
}
func BenchmarkAuditOperations(b *testing.B) { func TestCheckRetrieveAccessPolicy(t *testing.T) {
ctx := context.Background() cases := []securityTestCase{
cfg := &config.Config{ {
Agent: config.AgentConfig{ name: "qa engineer allowed",
ID: "bench-agent", role: "qa_engineer",
Role: "backend_developer", address: "agent1:backend_developer:api:tests",
expectSuccess: true,
}, },
Security: config.SecurityConfig{ {
KeyRotationDays: 90, name: "unknown role rejected",
AuditLogging: true, role: "unknown",
AuditPath: "/tmp/bench-audit.log", address: "agent1:backend_developer:api:tests",
expectSuccess: false,
expectErrHint: "unknown current role",
}, },
} }
eds := createMockEncryptedStorage(ctx, cfg) cfg := &config.Config{Agent: config.AgentConfig{}}
eds := newTestEncryptedStorage(cfg)
b.ResetTimer() for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
b.Run("store_audit", func(b *testing.B) { err := eds.checkRetrieveAccessPolicy(tc.role, tc.address)
for i := 0; i < b.N; i++ { verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
eds.auditStoreOperation("test:address", "backend_developer", "content", 1024, true, "")
}
})
b.Run("retrieve_audit", func(b *testing.B) {
for i := 0; i < b.N; i++ {
eds.auditRetrieveOperation("test:address", "backend_developer", true, "")
}
})
b.Run("announce_audit", func(b *testing.B) {
for i := 0; i < b.N; i++ {
eds.auditAnnounceOperation("test:address", "backend_developer", true, "")
}
}) })
} }
}
func TestCheckAnnounceAccessPolicy(t *testing.T) {
cases := []securityTestCase{
{
name: "architect can announce",
role: "senior_software_architect",
address: "agent1:senior_software_architect:architecture:proposal",
expectSuccess: true,
},
{
name: "suggestion role cannot announce",
role: "suggestion_only_role",
address: "agent1:suggestion_only_role:idea",
expectSuccess: false,
expectErrHint: "lacks authority",
},
{
name: "unknown role rejected",
role: "mystery",
address: "agent1:mystery:topic",
expectSuccess: false,
expectErrHint: "unknown current role",
},
}
cfg := &config.Config{Agent: config.AgentConfig{}}
eds := newTestEncryptedStorage(cfg)
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
err := eds.checkAnnounceAccessPolicy(tc.role, tc.address)
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
})
}
}
func verifySecurityExpectation(t *testing.T, expectSuccess bool, hint string, err error) {
t.Helper()
if expectSuccess {
if err != nil {
t.Fatalf("expected success, got error: %v", err)
}
return
}
if err == nil {
t.Fatal("expected error but got success")
}
if hint != "" && !strings.Contains(err.Error(), hint) {
t.Fatalf("expected error to contain %q, got %q", hint, err.Error())
}
}

View File

@@ -1,14 +1,117 @@
package dht package dht
import ( import (
"context"
"errors"
"fmt" "fmt"
"chorus/pkg/config" "chorus/pkg/config"
libp2p "github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/p2p/security/noise"
"github.com/libp2p/go-libp2p/p2p/transport/tcp"
"github.com/multiformats/go-multiaddr"
) )
// NewRealDHT creates a new real DHT implementation // RealDHT wraps a libp2p-based DHT to satisfy the generic DHT interface.
func NewRealDHT(config *config.HybridConfig) (DHT, error) { type RealDHT struct {
// TODO: Implement real DHT initialization cancel context.CancelFunc
// For now, return an error to indicate it's not yet implemented host host.Host
return nil, fmt.Errorf("real DHT implementation not yet available") dht *LibP2PDHT
}
// NewRealDHT creates a new real DHT implementation backed by libp2p.
func NewRealDHT(cfg *config.HybridConfig) (DHT, error) {
if cfg == nil {
cfg = &config.HybridConfig{}
}
ctx, cancel := context.WithCancel(context.Background())
listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0")
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create listen address: %w", err)
}
host, err := libp2p.New(
libp2p.ListenAddrs(listenAddr),
libp2p.Security(noise.ID, noise.New),
libp2p.Transport(tcp.NewTCPTransport),
libp2p.DefaultMuxers,
libp2p.EnableRelay(),
)
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create libp2p host: %w", err)
}
opts := []Option{
WithProtocolPrefix("/CHORUS"),
}
if nodes := cfg.GetDHTBootstrapNodes(); len(nodes) > 0 {
opts = append(opts, WithBootstrapPeersFromStrings(nodes))
}
libp2pDHT, err := NewLibP2PDHT(ctx, host, opts...)
if err != nil {
host.Close()
cancel()
return nil, fmt.Errorf("failed to initialize libp2p DHT: %w", err)
}
if err := libp2pDHT.Bootstrap(); err != nil {
libp2pDHT.Close()
host.Close()
cancel()
return nil, fmt.Errorf("failed to bootstrap DHT: %w", err)
}
return &RealDHT{
cancel: cancel,
host: host,
dht: libp2pDHT,
}, nil
}
// PutValue stores a value in the DHT.
func (r *RealDHT) PutValue(ctx context.Context, key string, value []byte) error {
return r.dht.PutValue(ctx, key, value)
}
// GetValue retrieves a value from the DHT.
func (r *RealDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
return r.dht.GetValue(ctx, key)
}
// Provide announces that this node can provide the given key.
func (r *RealDHT) Provide(ctx context.Context, key string) error {
return r.dht.Provide(ctx, key)
}
// FindProviders locates peers that can provide the specified key.
func (r *RealDHT) FindProviders(ctx context.Context, key string, limit int) ([]peer.AddrInfo, error) {
return r.dht.FindProviders(ctx, key, limit)
}
// GetStats exposes runtime metrics for the real DHT.
func (r *RealDHT) GetStats() DHTStats {
return r.dht.GetStats()
}
// Close releases resources associated with the DHT.
func (r *RealDHT) Close() error {
r.cancel()
var errs []error
if err := r.dht.Close(); err != nil {
errs = append(errs, err)
}
if err := r.host.Close(); err != nil {
errs = append(errs, err)
}
return errors.Join(errs...)
} }

View File

@@ -2,159 +2,106 @@ package dht
import ( import (
"context" "context"
"fmt"
"testing" "testing"
"time" "time"
) )
// TestReplicationManager tests basic replication manager functionality func newReplicationManagerForTest(t *testing.T) *ReplicationManager {
func TestReplicationManager(t *testing.T) { t.Helper()
ctx := context.Background()
// Create a mock DHT for testing cfg := &ReplicationConfig{
mockDHT := NewMockDHTInterface() ReplicationFactor: 3,
ReprovideInterval: time.Hour,
CleanupInterval: time.Hour,
ProviderTTL: 30 * time.Minute,
MaxProvidersPerKey: 5,
EnableAutoReplication: false,
EnableReprovide: false,
MaxConcurrentReplications: 1,
}
// Create replication manager rm := NewReplicationManager(context.Background(), nil, cfg)
config := DefaultReplicationConfig() t.Cleanup(func() {
config.ReprovideInterval = 1 * time.Second // Short interval for testing if rm.reprovideTimer != nil {
config.CleanupInterval = 1 * time.Second rm.reprovideTimer.Stop()
}
if rm.cleanupTimer != nil {
rm.cleanupTimer.Stop()
}
rm.cancel()
})
return rm
}
rm := NewReplicationManager(ctx, mockDHT.Mock(), config) func TestAddContentRegistersKey(t *testing.T) {
defer rm.Stop() rm := newReplicationManagerForTest(t)
// Test adding content if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
testKey := "test-content-key" t.Fatalf("expected AddContent to succeed, got error: %v", err)
testSize := int64(1024) }
testPriority := 5
err := rm.AddContent(testKey, testSize, testPriority) rm.keysMutex.RLock()
record, ok := rm.contentKeys["ucxl://example/path"]
rm.keysMutex.RUnlock()
if !ok {
t.Fatal("expected content key to be registered")
}
if record.Size != 512 {
t.Fatalf("expected size 512, got %d", record.Size)
}
}
func TestRemoveContentClearsTracking(t *testing.T) {
rm := newReplicationManagerForTest(t)
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
t.Fatalf("AddContent returned error: %v", err)
}
if err := rm.RemoveContent("ucxl://example/path"); err != nil {
t.Fatalf("RemoveContent returned error: %v", err)
}
rm.keysMutex.RLock()
_, exists := rm.contentKeys["ucxl://example/path"]
rm.keysMutex.RUnlock()
if exists {
t.Fatal("expected content key to be removed")
}
}
func TestGetReplicationStatusReturnsCopy(t *testing.T) {
rm := newReplicationManagerForTest(t)
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
t.Fatalf("AddContent returned error: %v", err)
}
status, err := rm.GetReplicationStatus("ucxl://example/path")
if err != nil { if err != nil {
t.Fatalf("Failed to add content: %v", err) t.Fatalf("GetReplicationStatus returned error: %v", err)
} }
// Test getting replication status if status.Key != "ucxl://example/path" {
status, err := rm.GetReplicationStatus(testKey) t.Fatalf("expected status key to match, got %s", status.Key)
if err != nil {
t.Fatalf("Failed to get replication status: %v", err)
} }
if status.Key != testKey { // Mutating status should not affect internal state
t.Errorf("Expected key %s, got %s", testKey, status.Key) status.HealthyProviders = 99
} internal, _ := rm.GetReplicationStatus("ucxl://example/path")
if internal.HealthyProviders == 99 {
if status.Size != testSize { t.Fatal("expected GetReplicationStatus to return a copy")
t.Errorf("Expected size %d, got %d", testSize, status.Size)
}
if status.Priority != testPriority {
t.Errorf("Expected priority %d, got %d", testPriority, status.Priority)
}
// Test providing content
err = rm.ProvideContent(testKey)
if err != nil {
t.Fatalf("Failed to provide content: %v", err)
}
// Test metrics
metrics := rm.GetMetrics()
if metrics.TotalKeys != 1 {
t.Errorf("Expected 1 total key, got %d", metrics.TotalKeys)
}
// Test finding providers
providers, err := rm.FindProviders(ctx, testKey, 10)
if err != nil {
t.Fatalf("Failed to find providers: %v", err)
}
t.Logf("Found %d providers for key %s", len(providers), testKey)
// Test removing content
err = rm.RemoveContent(testKey)
if err != nil {
t.Fatalf("Failed to remove content: %v", err)
}
// Verify content was removed
metrics = rm.GetMetrics()
if metrics.TotalKeys != 0 {
t.Errorf("Expected 0 total keys after removal, got %d", metrics.TotalKeys)
} }
} }
// TestLibP2PDHTReplication tests DHT replication functionality func TestGetMetricsReturnsSnapshot(t *testing.T) {
func TestLibP2PDHTReplication(t *testing.T) { rm := newReplicationManagerForTest(t)
// This would normally require a real libp2p setup
// For now, just test the interface methods exist
// Mock test - in a real implementation, you'd set up actual libp2p hosts
t.Log("DHT replication interface methods are implemented")
// Example of how the replication would be used:
// 1. Add content for replication
// 2. Content gets automatically provided to the DHT
// 3. Other nodes can discover this node as a provider
// 4. Periodic reproviding ensures content availability
// 5. Replication metrics track system health
}
// TestReplicationConfig tests replication configuration
func TestReplicationConfig(t *testing.T) {
config := DefaultReplicationConfig()
// Test default values
if config.ReplicationFactor != 3 {
t.Errorf("Expected default replication factor 3, got %d", config.ReplicationFactor)
}
if config.ReprovideInterval != 12*time.Hour {
t.Errorf("Expected default reprovide interval 12h, got %v", config.ReprovideInterval)
}
if !config.EnableAutoReplication {
t.Error("Expected auto replication to be enabled by default")
}
if !config.EnableReprovide {
t.Error("Expected reprovide to be enabled by default")
}
}
// TestProviderInfo tests provider information tracking
func TestProviderInfo(t *testing.T) {
// Test distance calculation
key := []byte("test-key")
peerID := "test-peer-id"
distance := calculateDistance(key, []byte(peerID))
// Distance should be non-zero for different inputs
if distance == 0 {
t.Error("Expected non-zero distance for different inputs")
}
t.Logf("Distance between key and peer: %d", distance)
}
// TestReplicationMetrics tests metrics collection
func TestReplicationMetrics(t *testing.T) {
ctx := context.Background()
mockDHT := NewMockDHTInterface()
rm := NewReplicationManager(ctx, mockDHT.Mock(), DefaultReplicationConfig())
defer rm.Stop()
// Add some content
for i := 0; i < 3; i++ {
key := fmt.Sprintf("test-key-%d", i)
rm.AddContent(key, int64(1000+i*100), i+1)
}
metrics := rm.GetMetrics() metrics := rm.GetMetrics()
if metrics == rm.metrics {
if metrics.TotalKeys != 3 { t.Fatal("expected GetMetrics to return a copy of metrics")
t.Errorf("Expected 3 total keys, got %d", metrics.TotalKeys)
} }
t.Logf("Replication metrics: %+v", metrics)
} }

View File

@@ -29,6 +29,11 @@ const (
// ElectionState represents the current election state // ElectionState represents the current election state
type ElectionState string type ElectionState string
const (
electionTopic = "CHORUS/election/v1"
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
)
const ( const (
StateIdle ElectionState = "idle" StateIdle ElectionState = "idle"
StateDiscovering ElectionState = "discovering" StateDiscovering ElectionState = "discovering"
@@ -149,14 +154,17 @@ func NewElectionManager(
func (em *ElectionManager) Start() error { func (em *ElectionManager) Start() error {
log.Printf("🗳️ Starting election manager for node %s", em.nodeID) log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
// TODO: Subscribe to election-related messages - pubsub interface needs update if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil { em.handleElectionMessage(data)
// return fmt.Errorf("failed to subscribe to election messages: %w", err) }); err != nil {
// } return fmt.Errorf("failed to subscribe to election messages: %w", err)
// }
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err) if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
// } em.handleAdminHeartbeat(data)
}); err != nil {
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
}
// Start discovery process // Start discovery process
go em.startDiscoveryLoop() go em.startDiscoveryLoop()
@@ -839,10 +847,7 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
return fmt.Errorf("failed to marshal election message: %w", err) return fmt.Errorf("failed to marshal election message: %w", err)
} }
// TODO: Fix pubsub interface return em.pubsub.PublishRaw(electionTopic, data)
// return em.pubsub.Publish("CHORUS/election/v1", data)
_ = data // Avoid unused variable
return nil
} }
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin) // SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
@@ -864,10 +869,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
return fmt.Errorf("failed to marshal heartbeat: %w", err) return fmt.Errorf("failed to marshal heartbeat: %w", err)
} }
// TODO: Fix pubsub interface return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
_ = data // Avoid unused variable
return nil
} }
// min returns the minimum of two float64 values // min returns the minimum of two float64 values

View File

@@ -2,451 +2,185 @@ package election
import ( import (
"context" "context"
"encoding/json"
"testing" "testing"
"time" "time"
"chorus/pkg/config" "chorus/pkg/config"
pubsubpkg "chorus/pubsub"
libp2p "github.com/libp2p/go-libp2p"
) )
func TestElectionManager_NewElectionManager(t *testing.T) { // newTestElectionManager wires a real libp2p host and PubSub instance so the
// election manager exercises the same code paths used in production.
func newTestElectionManager(t *testing.T) *ElectionManager {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
if err != nil {
cancel()
t.Fatalf("failed to create libp2p host: %v", err)
}
ps, err := pubsubpkg.NewPubSub(ctx, host, "", "")
if err != nil {
host.Close()
cancel()
t.Fatalf("failed to create pubsub: %v", err)
}
cfg := &config.Config{ cfg := &config.Config{
Agent: config.AgentConfig{ Agent: config.AgentConfig{
ID: "test-node", ID: host.ID().String(),
Role: "context_admin",
Capabilities: []string{"admin_election", "context_curation"},
Models: []string{"meta/llama-3.1-8b-instruct"},
Specialization: "coordination",
}, },
Security: config.SecurityConfig{},
} }
em := NewElectionManager(cfg) em := NewElectionManager(ctx, cfg, host, ps, host.ID().String())
if em == nil {
t.Fatal("Expected NewElectionManager to return non-nil manager") t.Cleanup(func() {
em.Stop()
ps.Close()
host.Close()
cancel()
})
return em
} }
if em.nodeID != "test-node" { func TestNewElectionManagerInitialState(t *testing.T) {
t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID) em := newTestElectionManager(t)
}
if em.state != StateIdle { if em.state != StateIdle {
t.Errorf("Expected initial state to be StateIdle, got %v", em.state) t.Fatalf("expected initial state %q, got %q", StateIdle, em.state)
}
if em.currentTerm != 0 {
t.Fatalf("expected initial term 0, got %d", em.currentTerm)
}
if em.nodeID == "" {
t.Fatal("expected nodeID to be populated")
} }
} }
func TestElectionManager_StartElection(t *testing.T) { func TestElectionManagerCanBeAdmin(t *testing.T) {
cfg := &config.Config{ em := newTestElectionManager(t)
Agent: config.AgentConfig{
ID: "test-node", if !em.canBeAdmin() {
}, t.Fatal("expected node to qualify for admin election")
} }
em := NewElectionManager(cfg) em.config.Agent.Capabilities = []string{"runtime_support"}
if em.canBeAdmin() {
// Start election t.Fatal("expected node without admin capabilities to be ineligible")
err := em.StartElection()
if err != nil {
t.Fatalf("Failed to start election: %v", err)
}
// Verify state changed
if em.state != StateCandidate {
t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state)
}
// Verify we added ourselves as a candidate
em.mu.RLock()
candidate, exists := em.candidates[em.nodeID]
em.mu.RUnlock()
if !exists {
t.Error("Expected to find ourselves as a candidate after starting election")
}
if candidate.NodeID != em.nodeID {
t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID)
} }
} }
func TestElectionManager_Vote(t *testing.T) { func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) {
cfg := &config.Config{ em := newTestElectionManager(t)
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Add a candidate first
candidate := &AdminCandidate{
NodeID: "candidate-1",
Term: 1,
Score: 0.8,
Capabilities: []string{"admin"},
LastSeen: time.Now(),
}
em.mu.Lock() em.mu.Lock()
em.candidates["candidate-1"] = candidate em.candidates = map[string]*AdminCandidate{
em.mu.Unlock() "candidate-1": {
// Vote for the candidate
err := em.Vote("candidate-1")
if err != nil {
t.Fatalf("Failed to vote: %v", err)
}
// Verify vote was recorded
em.mu.RLock()
vote, exists := em.votes[em.nodeID]
em.mu.RUnlock()
if !exists {
t.Error("Expected to find our vote after voting")
}
if vote != "candidate-1" {
t.Errorf("Expected vote to be for 'candidate-1', got %s", vote)
}
}
func TestElectionManager_VoteInvalidCandidate(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Try to vote for non-existent candidate
err := em.Vote("non-existent")
if err == nil {
t.Error("Expected error when voting for non-existent candidate")
}
}
func TestElectionManager_AddCandidate(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
candidate := &AdminCandidate{
NodeID: "new-candidate",
Term: 1,
Score: 0.7,
Capabilities: []string{"admin", "leader"},
LastSeen: time.Now(),
}
err := em.AddCandidate(candidate)
if err != nil {
t.Fatalf("Failed to add candidate: %v", err)
}
// Verify candidate was added
em.mu.RLock()
stored, exists := em.candidates["new-candidate"]
em.mu.RUnlock()
if !exists {
t.Error("Expected to find added candidate")
}
if stored.NodeID != "new-candidate" {
t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID)
}
if stored.Score != 0.7 {
t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score)
}
}
func TestElectionManager_FindElectionWinner(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Add candidates with different scores
candidates := []*AdminCandidate{
{
NodeID: "candidate-1", NodeID: "candidate-1",
Term: 1, PeerID: em.host.ID(),
Score: 0.6, Score: 0.65,
Capabilities: []string{"admin"},
LastSeen: time.Now(),
}, },
{ "candidate-2": {
NodeID: "candidate-2", NodeID: "candidate-2",
Term: 1, PeerID: em.host.ID(),
Score: 0.8, Score: 0.80,
Capabilities: []string{"admin", "leader"},
LastSeen: time.Now(),
},
{
NodeID: "candidate-3",
Term: 1,
Score: 0.7,
Capabilities: []string{"admin"},
LastSeen: time.Now(),
}, },
} }
em.votes = map[string]string{
em.mu.Lock() "voter-a": "candidate-1",
for _, candidate := range candidates { "voter-b": "candidate-2",
em.candidates[candidate.NodeID] = candidate "voter-c": "candidate-2",
}
// Add some votes
em.votes["voter-1"] = "candidate-2"
em.votes["voter-2"] = "candidate-2"
em.votes["voter-3"] = "candidate-1"
em.mu.Unlock()
// Find winner
winner := em.findElectionWinner()
if winner == nil {
t.Fatal("Expected findElectionWinner to return a winner")
}
// candidate-2 should win with most votes (2 votes)
if winner.NodeID != "candidate-2" {
t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID)
}
}
func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Add candidates but no votes - should fall back to highest score
candidates := []*AdminCandidate{
{
NodeID: "candidate-1",
Term: 1,
Score: 0.6,
Capabilities: []string{"admin"},
LastSeen: time.Now(),
},
{
NodeID: "candidate-2",
Term: 1,
Score: 0.9, // Highest score
Capabilities: []string{"admin", "leader"},
LastSeen: time.Now(),
},
}
em.mu.Lock()
for _, candidate := range candidates {
em.candidates[candidate.NodeID] = candidate
} }
em.mu.Unlock() em.mu.Unlock()
// Find winner without any votes
winner := em.findElectionWinner() winner := em.findElectionWinner()
if winner == nil { if winner == nil {
t.Fatal("Expected findElectionWinner to return a winner") t.Fatal("expected a winner to be selected")
} }
// candidate-2 should win with highest score
if winner.NodeID != "candidate-2" { if winner.NodeID != "candidate-2" {
t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID) t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID)
} }
} }
func TestElectionManager_HandleElectionVote(t *testing.T) { func TestHandleElectionMessageAddsCandidate(t *testing.T) {
cfg := &config.Config{ em := newTestElectionManager(t)
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg) em.mu.Lock()
em.currentTerm = 3
em.state = StateElecting
em.mu.Unlock()
// Add a candidate first
candidate := &AdminCandidate{ candidate := &AdminCandidate{
NodeID: "candidate-1", NodeID: "peer-2",
Term: 1, PeerID: em.host.ID(),
Score: 0.8, Capabilities: []string{"admin_election"},
Capabilities: []string{"admin"}, Uptime: time.Second,
LastSeen: time.Now(), Score: 0.75,
} }
em.mu.Lock() payload, err := json.Marshal(candidate)
em.candidates["candidate-1"] = candidate if err != nil {
em.mu.Unlock() t.Fatalf("failed to marshal candidate: %v", err)
}
var data map[string]interface{}
if err := json.Unmarshal(payload, &data); err != nil {
t.Fatalf("failed to unmarshal candidate payload: %v", err)
}
// Create vote message
msg := ElectionMessage{ msg := ElectionMessage{
Type: MessageTypeVote, Type: "candidacy_announcement",
NodeID: "voter-1", NodeID: "peer-2",
Data: map[string]interface{}{ Timestamp: time.Now(),
"candidate": "candidate-1", Term: 3,
}, Data: data,
} }
// Handle the vote serialized, err := json.Marshal(msg)
em.handleElectionVote(msg) if err != nil {
t.Fatalf("failed to marshal election message: %v", err)
}
em.handleElectionMessage(serialized)
// Verify vote was recorded
em.mu.RLock() em.mu.RLock()
vote, exists := em.votes["voter-1"] _, exists := em.candidates["peer-2"]
em.mu.RUnlock() em.mu.RUnlock()
if !exists { if !exists {
t.Error("Expected vote to be recorded after handling vote message") t.Fatal("expected candidacy announcement to register candidate")
}
if vote != "candidate-1" {
t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote)
} }
} }
func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) { func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) {
cfg := &config.Config{ em := newTestElectionManager(t)
Agent: config.AgentConfig{
ID: "test-node", if err := em.SendAdminHeartbeat(); err == nil {
}, t.Fatal("expected error when non-admin sends heartbeat")
} }
em := NewElectionManager(cfg) if err := em.Start(); err != nil {
t.Fatalf("failed to start election manager: %v", err)
// Create vote message with invalid data
msg := ElectionMessage{
Type: MessageTypeVote,
NodeID: "voter-1",
Data: "invalid-data", // Should be map[string]interface{}
}
// Handle the vote - should not crash
em.handleElectionVote(msg)
// Verify no vote was recorded
em.mu.RLock()
_, exists := em.votes["voter-1"]
em.mu.RUnlock()
if exists {
t.Error("Expected no vote to be recorded with invalid data")
}
}
func TestElectionManager_CompleteElection(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Set up election state
em.mu.Lock()
em.state = StateCandidate
em.currentTerm = 1
em.mu.Unlock()
// Add a candidate
candidate := &AdminCandidate{
NodeID: "winner",
Term: 1,
Score: 0.9,
Capabilities: []string{"admin", "leader"},
LastSeen: time.Now(),
} }
em.mu.Lock() em.mu.Lock()
em.candidates["winner"] = candidate em.currentAdmin = em.nodeID
em.mu.Unlock() em.mu.Unlock()
// Complete election if err := em.SendAdminHeartbeat(); err != nil {
em.CompleteElection() t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err)
// Verify state reset
em.mu.RLock()
state := em.state
em.mu.RUnlock()
if state != StateIdle {
t.Errorf("Expected state to be StateIdle after completing election, got %v", state)
}
}
func TestElectionManager_Concurrency(t *testing.T) {
cfg := &config.Config{
Agent: config.AgentConfig{
ID: "test-node",
},
}
em := NewElectionManager(cfg)
// Test concurrent access to vote and candidate operations
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
// Add a candidate
candidate := &AdminCandidate{
NodeID: "candidate-1",
Term: 1,
Score: 0.8,
Capabilities: []string{"admin"},
LastSeen: time.Now(),
}
err := em.AddCandidate(candidate)
if err != nil {
t.Fatalf("Failed to add candidate: %v", err)
}
// Run concurrent operations
done := make(chan bool, 2)
// Concurrent voting
go func() {
defer func() { done <- true }()
for i := 0; i < 10; i++ {
select {
case <-ctx.Done():
return
default:
em.Vote("candidate-1") // Ignore errors in concurrent test
time.Sleep(10 * time.Millisecond)
}
}
}()
// Concurrent state checking
go func() {
defer func() { done <- true }()
for i := 0; i < 10; i++ {
select {
case <-ctx.Done():
return
default:
em.findElectionWinner() // Just check for races
time.Sleep(10 * time.Millisecond)
}
}
}()
// Wait for completion
for i := 0; i < 2; i++ {
select {
case <-done:
case <-ctx.Done():
t.Fatal("Concurrent test timed out")
}
} }
} }

View File

@@ -2,15 +2,14 @@ package metrics
import ( import (
"context" "context"
"fmt"
"log" "log"
"net/http" "net/http"
"sync" "sync"
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
) )
// CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system // CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system
@@ -78,6 +77,9 @@ type CHORUSMetrics struct {
slurpActiveJobs prometheus.Gauge slurpActiveJobs prometheus.Gauge
slurpLeadershipEvents prometheus.Counter slurpLeadershipEvents prometheus.Counter
// SHHH sentinel metrics
shhhFindings *prometheus.CounterVec
// UCXI metrics (protocol resolution) // UCXI metrics (protocol resolution)
ucxiRequests *prometheus.CounterVec ucxiRequests *prometheus.CounterVec
ucxiResolutionLatency prometheus.Histogram ucxiResolutionLatency prometheus.Histogram
@@ -409,6 +411,15 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
}, },
) )
// SHHH metrics
m.shhhFindings = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "chorus_shhh_findings_total",
Help: "Total number of SHHH redaction findings",
},
[]string{"rule", "severity"},
)
// UCXI metrics // UCXI metrics
m.ucxiRequests = promauto.NewCounterVec( m.ucxiRequests = promauto.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
@@ -656,6 +667,15 @@ func (m *CHORUSMetrics) SetSLURPQueueLength(length int) {
m.slurpQueueLength.Set(float64(length)) m.slurpQueueLength.Set(float64(length))
} }
// SHHH Metrics Methods
func (m *CHORUSMetrics) IncrementSHHHFindings(rule, severity string, count int) {
if m == nil || m.shhhFindings == nil || count <= 0 {
return
}
m.shhhFindings.WithLabelValues(rule, severity).Add(float64(count))
}
// UCXI Metrics Methods // UCXI Metrics Methods
func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) { func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) {

11
pkg/shhh/doc.go Normal file
View File

@@ -0,0 +1,11 @@
// Package shhh provides the CHORUS secrets sentinel responsible for detecting
// and redacting sensitive values before they leave the runtime. The sentinel
// focuses on predictable failure modes (log emission, telemetry fan-out,
// request forwarding) and offers a composable API for registering additional
// redaction rules, emitting audit events, and tracking operational metrics.
//
// The initial implementation focuses on high-signal secrets (API keys,
// bearer/OAuth tokens, private keys) so the runtime can start integrating
// SHHH into COOEE and WHOOSH logging immediately while the broader roadmap
// items (automated redaction replay, policy driven rules) continue landing.
package shhh

130
pkg/shhh/rule.go Normal file
View File

@@ -0,0 +1,130 @@
package shhh
import (
"crypto/sha256"
"encoding/base64"
"regexp"
"sort"
"strings"
)
type compiledRule struct {
name string
regex *regexp.Regexp
replacement string
severity Severity
tags []string
}
type matchRecord struct {
value string
}
func (r *compiledRule) apply(in string) (string, []matchRecord) {
indices := r.regex.FindAllStringSubmatchIndex(in, -1)
if len(indices) == 0 {
return in, nil
}
var builder strings.Builder
builder.Grow(len(in))
matches := make([]matchRecord, 0, len(indices))
last := 0
for _, loc := range indices {
start, end := loc[0], loc[1]
builder.WriteString(in[last:start])
replaced := r.regex.ExpandString(nil, r.replacement, in, loc)
builder.Write(replaced)
matches = append(matches, matchRecord{value: in[start:end]})
last = end
}
builder.WriteString(in[last:])
return builder.String(), matches
}
func buildDefaultRuleConfigs(placeholder string) []RuleConfig {
if placeholder == "" {
placeholder = "[REDACTED]"
}
return []RuleConfig{
{
Name: "bearer-token",
Pattern: `(?i)(authorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`,
ReplacementTemplate: "$1" + placeholder,
Severity: SeverityMedium,
Tags: []string{"token", "http"},
},
{
Name: "api-key",
Pattern: `(?i)((?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
ReplacementTemplate: "$1" + placeholder + "$3",
Severity: SeverityHigh,
Tags: []string{"credentials"},
},
{
Name: "openai-secret",
Pattern: `(sk-[A-Za-z0-9]{20,})`,
ReplacementTemplate: placeholder,
Severity: SeverityHigh,
Tags: []string{"llm", "api"},
},
{
Name: "oauth-refresh-token",
Pattern: `(?i)(refresh_token"?\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
ReplacementTemplate: "$1" + placeholder + "$3",
Severity: SeverityMedium,
Tags: []string{"oauth"},
},
{
Name: "private-key-block",
Pattern: `(?s)(-----BEGIN [^-]+ PRIVATE KEY-----)[^-]+(-----END [^-]+ PRIVATE KEY-----)`,
ReplacementTemplate: "$1\n" + placeholder + "\n$2",
Severity: SeverityHigh,
Tags: []string{"pem", "key"},
},
}
}
func compileRules(cfg Config, placeholder string) ([]*compiledRule, error) {
configs := make([]RuleConfig, 0)
if !cfg.DisableDefaultRules {
configs = append(configs, buildDefaultRuleConfigs(placeholder)...)
}
configs = append(configs, cfg.CustomRules...)
rules := make([]*compiledRule, 0, len(configs))
for _, rc := range configs {
if rc.Name == "" || rc.Pattern == "" {
continue
}
replacement := rc.ReplacementTemplate
if replacement == "" {
replacement = placeholder
}
re, err := regexp.Compile(rc.Pattern)
if err != nil {
return nil, err
}
compiled := &compiledRule{
name: rc.Name,
replacement: replacement,
regex: re,
severity: rc.Severity,
tags: append([]string(nil), rc.Tags...),
}
rules = append(rules, compiled)
}
sort.SliceStable(rules, func(i, j int) bool {
return rules[i].name < rules[j].name
})
return rules, nil
}
func hashSecret(value string) string {
sum := sha256.Sum256([]byte(value))
return base64.RawStdEncoding.EncodeToString(sum[:])
}

407
pkg/shhh/sentinel.go Normal file
View File

@@ -0,0 +1,407 @@
package shhh
import (
"context"
"errors"
"fmt"
"sort"
"sync"
)
// Option configures the sentinel during construction.
type Option func(*Sentinel)
// FindingObserver receives aggregated findings for each redaction operation.
type FindingObserver func(context.Context, []Finding)
// WithAuditSink attaches an audit sink for per-redaction events.
func WithAuditSink(sink AuditSink) Option {
return func(s *Sentinel) {
s.audit = sink
}
}
// WithStats allows callers to supply a shared stats collector.
func WithStats(stats *Stats) Option {
return func(s *Sentinel) {
s.stats = stats
}
}
// WithFindingObserver registers an observer that is invoked whenever redaction
// produces findings.
func WithFindingObserver(observer FindingObserver) Option {
return func(s *Sentinel) {
if observer == nil {
return
}
s.observers = append(s.observers, observer)
}
}
// Sentinel performs secret detection/redaction across text payloads.
type Sentinel struct {
mu sync.RWMutex
enabled bool
placeholder string
rules []*compiledRule
audit AuditSink
stats *Stats
observers []FindingObserver
}
// NewSentinel creates a new secrets sentinel using the provided configuration.
func NewSentinel(cfg Config, opts ...Option) (*Sentinel, error) {
placeholder := cfg.RedactionPlaceholder
if placeholder == "" {
placeholder = "[REDACTED]"
}
s := &Sentinel{
enabled: !cfg.Disabled,
placeholder: placeholder,
stats: NewStats(),
}
for _, opt := range opts {
opt(s)
}
if s.stats == nil {
s.stats = NewStats()
}
rules, err := compileRules(cfg, placeholder)
if err != nil {
return nil, fmt.Errorf("compile SHHH rules: %w", err)
}
if len(rules) == 0 {
return nil, errors.New("no SHHH rules configured")
}
s.rules = rules
return s, nil
}
// Enabled reports whether the sentinel is actively redacting.
func (s *Sentinel) Enabled() bool {
s.mu.RLock()
defer s.mu.RUnlock()
return s.enabled
}
// Toggle enables or disables the sentinel at runtime.
func (s *Sentinel) Toggle(enabled bool) {
s.mu.Lock()
defer s.mu.Unlock()
s.enabled = enabled
}
// SetAuditSink updates the audit sink at runtime.
func (s *Sentinel) SetAuditSink(sink AuditSink) {
s.mu.Lock()
defer s.mu.Unlock()
s.audit = sink
}
// AddFindingObserver registers an observer after construction.
func (s *Sentinel) AddFindingObserver(observer FindingObserver) {
if observer == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
s.observers = append(s.observers, observer)
}
// StatsSnapshot returns a snapshot of the current counters.
func (s *Sentinel) StatsSnapshot() StatsSnapshot {
s.mu.RLock()
stats := s.stats
s.mu.RUnlock()
if stats == nil {
return StatsSnapshot{}
}
return stats.Snapshot()
}
// RedactText scans the provided text and redacts any findings.
func (s *Sentinel) RedactText(ctx context.Context, text string, labels map[string]string) (string, []Finding) {
s.mu.RLock()
enabled := s.enabled
rules := s.rules
stats := s.stats
audit := s.audit
s.mu.RUnlock()
if !enabled || len(rules) == 0 {
return text, nil
}
if stats != nil {
stats.IncScan()
}
aggregates := make(map[string]*findingAggregate)
current := text
path := derivePath(labels)
for _, rule := range rules {
redacted, matches := rule.apply(current)
if len(matches) == 0 {
continue
}
current = redacted
if stats != nil {
stats.AddFindings(rule.name, len(matches))
}
recordAggregate(aggregates, rule, path, len(matches))
if audit != nil {
metadata := cloneLabels(labels)
for _, match := range matches {
event := AuditEvent{
Rule: rule.name,
Severity: rule.severity,
Tags: append([]string(nil), rule.tags...),
Path: path,
Hash: hashSecret(match.value),
Metadata: metadata,
}
audit.RecordRedaction(ctx, event)
}
}
}
findings := flattenAggregates(aggregates)
s.notifyObservers(ctx, findings)
return current, findings
}
// RedactMap walks the map and redacts in-place. It returns the collected findings.
func (s *Sentinel) RedactMap(ctx context.Context, payload map[string]any) []Finding {
return s.RedactMapWithLabels(ctx, payload, nil)
}
// RedactMapWithLabels allows callers to specify base labels that will be merged
// into metadata for nested structures.
func (s *Sentinel) RedactMapWithLabels(ctx context.Context, payload map[string]any, baseLabels map[string]string) []Finding {
if payload == nil {
return nil
}
aggregates := make(map[string]*findingAggregate)
s.redactValue(ctx, payload, "", baseLabels, aggregates)
findings := flattenAggregates(aggregates)
s.notifyObservers(ctx, findings)
return findings
}
func (s *Sentinel) redactValue(ctx context.Context, value any, path string, baseLabels map[string]string, agg map[string]*findingAggregate) {
switch v := value.(type) {
case map[string]interface{}:
for key, val := range v {
childPath := joinPath(path, key)
switch typed := val.(type) {
case string:
labels := mergeLabels(baseLabels, childPath)
redacted, findings := s.RedactText(ctx, typed, labels)
if redacted != typed {
v[key] = redacted
}
mergeAggregates(agg, findings)
case fmt.Stringer:
labels := mergeLabels(baseLabels, childPath)
text := typed.String()
redacted, findings := s.RedactText(ctx, text, labels)
if redacted != text {
v[key] = redacted
}
mergeAggregates(agg, findings)
default:
s.redactValue(ctx, typed, childPath, baseLabels, agg)
}
}
case []interface{}:
for idx, item := range v {
childPath := indexPath(path, idx)
switch typed := item.(type) {
case string:
labels := mergeLabels(baseLabels, childPath)
redacted, findings := s.RedactText(ctx, typed, labels)
if redacted != typed {
v[idx] = redacted
}
mergeAggregates(agg, findings)
case fmt.Stringer:
labels := mergeLabels(baseLabels, childPath)
text := typed.String()
redacted, findings := s.RedactText(ctx, text, labels)
if redacted != text {
v[idx] = redacted
}
mergeAggregates(agg, findings)
default:
s.redactValue(ctx, typed, childPath, baseLabels, agg)
}
}
case []string:
for idx, item := range v {
childPath := indexPath(path, idx)
labels := mergeLabels(baseLabels, childPath)
redacted, findings := s.RedactText(ctx, item, labels)
if redacted != item {
v[idx] = redacted
}
mergeAggregates(agg, findings)
}
}
}
func (s *Sentinel) notifyObservers(ctx context.Context, findings []Finding) {
if len(findings) == 0 {
return
}
findingsCopy := append([]Finding(nil), findings...)
s.mu.RLock()
observers := append([]FindingObserver(nil), s.observers...)
s.mu.RUnlock()
for _, observer := range observers {
observer(ctx, findingsCopy)
}
}
func mergeAggregates(dest map[string]*findingAggregate, findings []Finding) {
for i := range findings {
f := findings[i]
agg := dest[f.Rule]
if agg == nil {
agg = &findingAggregate{
rule: f.Rule,
severity: f.Severity,
tags: append([]string(nil), f.Tags...),
locations: make(map[string]int),
}
dest[f.Rule] = agg
}
agg.count += f.Count
for _, loc := range f.Locations {
agg.locations[loc.Path] += loc.Count
}
}
}
func recordAggregate(dest map[string]*findingAggregate, rule *compiledRule, path string, count int) {
agg := dest[rule.name]
if agg == nil {
agg = &findingAggregate{
rule: rule.name,
severity: rule.severity,
tags: append([]string(nil), rule.tags...),
locations: make(map[string]int),
}
dest[rule.name] = agg
}
agg.count += count
if path != "" {
agg.locations[path] += count
}
}
func flattenAggregates(agg map[string]*findingAggregate) []Finding {
if len(agg) == 0 {
return nil
}
keys := make([]string, 0, len(agg))
for key := range agg {
keys = append(keys, key)
}
sort.Strings(keys)
findings := make([]Finding, 0, len(agg))
for _, key := range keys {
entry := agg[key]
locations := make([]Location, 0, len(entry.locations))
if len(entry.locations) > 0 {
paths := make([]string, 0, len(entry.locations))
for path := range entry.locations {
paths = append(paths, path)
}
sort.Strings(paths)
for _, path := range paths {
locations = append(locations, Location{Path: path, Count: entry.locations[path]})
}
}
findings = append(findings, Finding{
Rule: entry.rule,
Severity: entry.severity,
Tags: append([]string(nil), entry.tags...),
Count: entry.count,
Locations: locations,
})
}
return findings
}
func derivePath(labels map[string]string) string {
if labels == nil {
return ""
}
if path := labels["path"]; path != "" {
return path
}
if path := labels["source"]; path != "" {
return path
}
if path := labels["field"]; path != "" {
return path
}
return ""
}
func cloneLabels(labels map[string]string) map[string]string {
if len(labels) == 0 {
return nil
}
clone := make(map[string]string, len(labels))
for k, v := range labels {
clone[k] = v
}
return clone
}
func joinPath(prefix, key string) string {
if prefix == "" {
return key
}
if key == "" {
return prefix
}
return prefix + "." + key
}
func indexPath(prefix string, idx int) string {
if prefix == "" {
return fmt.Sprintf("[%d]", idx)
}
return fmt.Sprintf("%s[%d]", prefix, idx)
}
func mergeLabels(base map[string]string, path string) map[string]string {
if base == nil && path == "" {
return nil
}
labels := cloneLabels(base)
if labels == nil {
labels = make(map[string]string, 1)
}
if path != "" {
labels["path"] = path
}
return labels
}
type findingAggregate struct {
rule string
severity Severity
tags []string
count int
locations map[string]int
}

95
pkg/shhh/sentinel_test.go Normal file
View File

@@ -0,0 +1,95 @@
package shhh
import (
"context"
"testing"
"github.com/stretchr/testify/require"
)
type recordingSink struct {
events []AuditEvent
}
func (r *recordingSink) RecordRedaction(_ context.Context, event AuditEvent) {
r.events = append(r.events, event)
}
func TestRedactText_DefaultRules(t *testing.T) {
sentinel, err := NewSentinel(Config{})
require.NoError(t, err)
input := "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.secret"
redacted, findings := sentinel.RedactText(context.Background(), input, map[string]string{"source": "http.request.headers.authorization"})
require.Equal(t, "Authorization: Bearer [REDACTED]", redacted)
require.Len(t, findings, 1)
require.Equal(t, "bearer-token", findings[0].Rule)
require.Equal(t, 1, findings[0].Count)
require.NotEmpty(t, findings[0].Locations)
snapshot := sentinel.StatsSnapshot()
require.Equal(t, uint64(1), snapshot.TotalScans)
require.Equal(t, uint64(1), snapshot.TotalFindings)
require.Equal(t, uint64(1), snapshot.PerRuleFindings["bearer-token"])
}
func TestRedactMap_NestedStructures(t *testing.T) {
sentinel, err := NewSentinel(Config{})
require.NoError(t, err)
payload := map[string]any{
"config": map[string]any{
"api_key": "API_KEY=1234567890ABCDEFG",
},
"tokens": []any{
"sk-test1234567890ABCDEF",
map[string]any{"refresh": "refresh_token=abcdef12345"},
},
}
findings := sentinel.RedactMap(context.Background(), payload)
require.NotEmpty(t, findings)
config := payload["config"].(map[string]any)
require.Equal(t, "API_KEY=[REDACTED]", config["api_key"])
tokens := payload["tokens"].([]any)
require.Equal(t, "[REDACTED]", tokens[0])
inner := tokens[1].(map[string]any)
require.Equal(t, "refresh_token=[REDACTED]", inner["refresh"])
total := 0
for _, finding := range findings {
total += finding.Count
}
require.Equal(t, 3, total)
}
func TestAuditSinkReceivesEvents(t *testing.T) {
sink := &recordingSink{}
cfg := Config{
DisableDefaultRules: true,
CustomRules: []RuleConfig{
{
Name: "custom-secret",
Pattern: `(secret\s*=\s*)([A-Za-z0-9]{6,})`,
ReplacementTemplate: "$1[REDACTED]",
Severity: SeverityHigh,
},
},
}
sentinel, err := NewSentinel(cfg, WithAuditSink(sink))
require.NoError(t, err)
_, findings := sentinel.RedactText(context.Background(), "secret=mysecretvalue", map[string]string{"source": "test"})
require.Len(t, findings, 1)
require.Equal(t, 1, findings[0].Count)
require.Len(t, sink.events, 1)
require.Equal(t, "custom-secret", sink.events[0].Rule)
require.NotEmpty(t, sink.events[0].Hash)
require.Equal(t, "test", sink.events[0].Path)
}

60
pkg/shhh/stats.go Normal file
View File

@@ -0,0 +1,60 @@
package shhh
import (
"sync"
"sync/atomic"
)
// Stats tracks aggregate counts for the sentinel.
type Stats struct {
totalScans atomic.Uint64
totalFindings atomic.Uint64
perRule sync.Map // string -> *atomic.Uint64
}
// NewStats constructs a Stats collector.
func NewStats() *Stats {
return &Stats{}
}
// IncScan increments the total scan counter.
func (s *Stats) IncScan() {
if s == nil {
return
}
s.totalScans.Add(1)
}
// AddFindings records findings for a rule.
func (s *Stats) AddFindings(rule string, count int) {
if s == nil || count <= 0 {
return
}
s.totalFindings.Add(uint64(count))
counterAny, _ := s.perRule.LoadOrStore(rule, new(atomic.Uint64))
counter := counterAny.(*atomic.Uint64)
counter.Add(uint64(count))
}
// Snapshot returns a point-in-time view of the counters.
func (s *Stats) Snapshot() StatsSnapshot {
if s == nil {
return StatsSnapshot{}
}
snapshot := StatsSnapshot{
TotalScans: s.totalScans.Load(),
TotalFindings: s.totalFindings.Load(),
PerRuleFindings: make(map[string]uint64),
}
s.perRule.Range(func(key, value any) bool {
name, ok := key.(string)
if !ok {
return true
}
if counter, ok := value.(*atomic.Uint64); ok {
snapshot.PerRuleFindings[name] = counter.Load()
}
return true
})
return snapshot
}

73
pkg/shhh/types.go Normal file
View File

@@ -0,0 +1,73 @@
package shhh
import "context"
// Severity represents the criticality associated with a redaction finding.
type Severity string
const (
// SeverityLow indicates low-impact findings (e.g. non-production credentials).
SeverityLow Severity = "low"
// SeverityMedium indicates medium impact findings (e.g. access tokens).
SeverityMedium Severity = "medium"
// SeverityHigh indicates high-impact findings (e.g. private keys).
SeverityHigh Severity = "high"
)
// RuleConfig defines a redaction rule that SHHH should enforce.
type RuleConfig struct {
Name string `json:"name"`
Pattern string `json:"pattern"`
ReplacementTemplate string `json:"replacement_template"`
Severity Severity `json:"severity"`
Tags []string `json:"tags"`
}
// Config controls sentinel behaviour.
type Config struct {
// Disabled toggles redaction off entirely.
Disabled bool `json:"disabled"`
// RedactionPlaceholder overrides the default placeholder value.
RedactionPlaceholder string `json:"redaction_placeholder"`
// DisableDefaultRules disables the built-in curated rule set.
DisableDefaultRules bool `json:"disable_default_rules"`
// CustomRules allows callers to append bespoke redaction patterns.
CustomRules []RuleConfig `json:"custom_rules"`
}
// Finding represents a single rule firing during redaction.
type Finding struct {
Rule string `json:"rule"`
Severity Severity `json:"severity"`
Tags []string `json:"tags,omitempty"`
Count int `json:"count"`
Locations []Location `json:"locations,omitempty"`
}
// Location describes where a secret was found.
type Location struct {
Path string `json:"path"`
Count int `json:"count"`
}
// StatsSnapshot exposes aggregate counters for observability.
type StatsSnapshot struct {
TotalScans uint64 `json:"total_scans"`
TotalFindings uint64 `json:"total_findings"`
PerRuleFindings map[string]uint64 `json:"per_rule_findings"`
}
// AuditEvent captures a single redaction occurrence for downstream sinks.
type AuditEvent struct {
Rule string `json:"rule"`
Severity Severity `json:"severity"`
Tags []string `json:"tags,omitempty"`
Path string `json:"path,omitempty"`
Hash string `json:"hash"`
Metadata map[string]string `json:"metadata,omitempty"`
}
// AuditSink receives redaction events for long term storage / replay.
type AuditSink interface {
RecordRedaction(ctx context.Context, event AuditEvent)
}

View File

@@ -74,7 +74,11 @@ func (dp *DecisionPublisher) PublishTaskDecision(decision *TaskDecision) error {
decision.Role = dp.config.Agent.Role decision.Role = dp.config.Agent.Role
} }
if decision.Project == "" { if decision.Project == "" {
decision.Project = "default-project" // TODO: Add project field to config if project := dp.config.Agent.Project; project != "" {
decision.Project = project
} else {
decision.Project = "chorus"
}
} }
if decision.Timestamp.IsZero() { if decision.Timestamp.IsZero() {
decision.Timestamp = time.Now() decision.Timestamp = time.Now()
@@ -364,12 +368,16 @@ func (dp *DecisionPublisher) allHealthChecksPass(healthChecks map[string]bool) b
// GetPublisherMetrics returns metrics about the decision publisher // GetPublisherMetrics returns metrics about the decision publisher
func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} { func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} {
dhtMetrics := dp.dhtStorage.GetMetrics() dhtMetrics := dp.dhtStorage.GetMetrics()
project := dp.config.Agent.Project
if project == "" {
project = "chorus"
}
return map[string]interface{}{ return map[string]interface{}{
"node_id": dp.nodeID, "node_id": dp.nodeID,
"agent_name": dp.agentName, "agent_name": dp.agentName,
"current_role": dp.config.Agent.Role, "current_role": dp.config.Agent.Role,
"project": "default-project", // TODO: Add project field to config "project": project,
"dht_metrics": dhtMetrics, "dht_metrics": dhtMetrics,
"last_publish": time.Now(), // This would be tracked in a real implementation "last_publish": time.Now(), // This would be tracked in a real implementation
} }

View File

@@ -8,9 +8,10 @@ import (
"sync" "sync"
"time" "time"
"chorus/pkg/shhh"
pubsub "github.com/libp2p/go-libp2p-pubsub"
"github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
pubsub "github.com/libp2p/go-libp2p-pubsub"
) )
// PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion // PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion
@@ -35,6 +36,8 @@ type PubSub struct {
dynamicTopicsMux sync.RWMutex dynamicTopicsMux sync.RWMutex
dynamicSubs map[string]*pubsub.Subscription dynamicSubs map[string]*pubsub.Subscription
dynamicSubsMux sync.RWMutex dynamicSubsMux sync.RWMutex
dynamicHandlers map[string]func([]byte, peer.ID)
dynamicHandlersMux sync.RWMutex
// Configuration // Configuration
chorusTopicName string chorusTopicName string
@@ -49,6 +52,10 @@ type PubSub struct {
// Hypercore-style logging // Hypercore-style logging
hypercoreLog HypercoreLogger hypercoreLog HypercoreLogger
// SHHH sentinel
redactor *shhh.Sentinel
redactorMux sync.RWMutex
} }
// HypercoreLogger interface for dependency injection // HypercoreLogger interface for dependency injection
@@ -159,6 +166,7 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
contextTopicName: contextTopic, contextTopicName: contextTopic,
dynamicTopics: make(map[string]*pubsub.Topic), dynamicTopics: make(map[string]*pubsub.Topic),
dynamicSubs: make(map[string]*pubsub.Subscription), dynamicSubs: make(map[string]*pubsub.Subscription),
dynamicHandlers: make(map[string]func([]byte, peer.ID)),
hypercoreLog: logger, hypercoreLog: logger,
} }
@@ -177,6 +185,13 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
return p, nil return p, nil
} }
// SetRedactor wires the SHHH sentinel so outbound messages are sanitized before publication.
func (p *PubSub) SetRedactor(redactor *shhh.Sentinel) {
p.redactorMux.Lock()
defer p.redactorMux.Unlock()
p.redactor = redactor
}
// SetHmmmMessageHandler sets the handler for incoming HMMM messages. // SetHmmmMessageHandler sets the handler for incoming HMMM messages.
func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) { func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) {
p.HmmmMessageHandler = handler p.HmmmMessageHandler = handler
@@ -231,15 +246,21 @@ func (p *PubSub) joinStaticTopics() error {
return nil return nil
} }
// JoinDynamicTopic joins a new topic for a specific task // subscribeDynamicTopic joins a topic and optionally assigns a raw handler.
func (p *PubSub) JoinDynamicTopic(topicName string) error { func (p *PubSub) subscribeDynamicTopic(topicName string, handler func([]byte, peer.ID)) error {
p.dynamicTopicsMux.Lock() if topicName == "" {
defer p.dynamicTopicsMux.Unlock() return fmt.Errorf("topic name cannot be empty")
p.dynamicSubsMux.Lock() }
defer p.dynamicSubsMux.Unlock()
if _, exists := p.dynamicTopics[topicName]; exists { p.dynamicTopicsMux.RLock()
return nil // Already joined _, exists := p.dynamicTopics[topicName]
p.dynamicTopicsMux.RUnlock()
if exists {
p.dynamicHandlersMux.Lock()
p.dynamicHandlers[topicName] = handler
p.dynamicHandlersMux.Unlock()
return nil
} }
topic, err := p.ps.Join(topicName) topic, err := p.ps.Join(topicName)
@@ -253,16 +274,46 @@ func (p *PubSub) JoinDynamicTopic(topicName string) error {
return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err) return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err)
} }
p.dynamicTopicsMux.Lock()
if _, already := p.dynamicTopics[topicName]; already {
p.dynamicTopicsMux.Unlock()
sub.Cancel()
topic.Close()
p.dynamicHandlersMux.Lock()
p.dynamicHandlers[topicName] = handler
p.dynamicHandlersMux.Unlock()
return nil
}
p.dynamicTopics[topicName] = topic p.dynamicTopics[topicName] = topic
p.dynamicSubs[topicName] = sub p.dynamicTopicsMux.Unlock()
// Start a handler for this new subscription p.dynamicSubsMux.Lock()
go p.handleDynamicMessages(sub) p.dynamicSubs[topicName] = sub
p.dynamicSubsMux.Unlock()
p.dynamicHandlersMux.Lock()
p.dynamicHandlers[topicName] = handler
p.dynamicHandlersMux.Unlock()
go p.handleDynamicMessages(topicName, sub)
fmt.Printf("✅ Joined dynamic topic: %s\n", topicName) fmt.Printf("✅ Joined dynamic topic: %s\n", topicName)
return nil return nil
} }
// JoinDynamicTopic joins a new topic for a specific task
func (p *PubSub) JoinDynamicTopic(topicName string) error {
return p.subscribeDynamicTopic(topicName, nil)
}
// SubscribeRawTopic joins a topic and delivers raw payloads to the provided handler.
func (p *PubSub) SubscribeRawTopic(topicName string, handler func([]byte, peer.ID)) error {
if handler == nil {
return fmt.Errorf("handler cannot be nil")
}
return p.subscribeDynamicTopic(topicName, handler)
}
// JoinRoleBasedTopics joins topics based on role and expertise // JoinRoleBasedTopics joins topics based on role and expertise
func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error { func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error {
var topicsToJoin []string var topicsToJoin []string
@@ -324,6 +375,10 @@ func (p *PubSub) LeaveDynamicTopic(topicName string) {
delete(p.dynamicTopics, topicName) delete(p.dynamicTopics, topicName)
} }
p.dynamicHandlersMux.Lock()
delete(p.dynamicHandlers, topicName)
p.dynamicHandlersMux.Unlock()
fmt.Printf("🗑️ Left dynamic topic: %s\n", topicName) fmt.Printf("🗑️ Left dynamic topic: %s\n", topicName)
} }
@@ -337,11 +392,12 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da
return fmt.Errorf("not subscribed to dynamic topic: %s", topicName) return fmt.Errorf("not subscribed to dynamic topic: %s", topicName)
} }
payload := p.sanitizePayload(topicName, msgType, data)
msg := Message{ msg := Message{
Type: msgType, Type: msgType,
From: p.host.ID().String(), From: p.host.ID().String(),
Timestamp: time.Now(), Timestamp: time.Now(),
Data: data, Data: payload,
} }
msgBytes, err := json.Marshal(msg) msgBytes, err := json.Marshal(msg)
@@ -379,11 +435,12 @@ func (p *PubSub) PublishRaw(topicName string, payload []byte) error {
// PublishBzzzMessage publishes a message to the Bzzz coordination topic // PublishBzzzMessage publishes a message to the Bzzz coordination topic
func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error { func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error {
payload := p.sanitizePayload(p.chorusTopicName, msgType, data)
msg := Message{ msg := Message{
Type: msgType, Type: msgType,
From: p.host.ID().String(), From: p.host.ID().String(),
Timestamp: time.Now(), Timestamp: time.Now(),
Data: data, Data: payload,
} }
msgBytes, err := json.Marshal(msg) msgBytes, err := json.Marshal(msg)
@@ -396,11 +453,12 @@ func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interfa
// PublishHmmmMessage publishes a message to the HMMM meta-discussion topic // PublishHmmmMessage publishes a message to the HMMM meta-discussion topic
func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error { func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error {
payload := p.sanitizePayload(p.hmmmTopicName, msgType, data)
msg := Message{ msg := Message{
Type: msgType, Type: msgType,
From: p.host.ID().String(), From: p.host.ID().String(),
Timestamp: time.Now(), Timestamp: time.Now(),
Data: data, Data: payload,
} }
msgBytes, err := json.Marshal(msg) msgBytes, err := json.Marshal(msg)
@@ -425,11 +483,12 @@ func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.I
// PublishContextFeedbackMessage publishes a message to the Context Feedback topic // PublishContextFeedbackMessage publishes a message to the Context Feedback topic
func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error { func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error {
payload := p.sanitizePayload(p.contextTopicName, msgType, data)
msg := Message{ msg := Message{
Type: msgType, Type: msgType,
From: p.host.ID().String(), From: p.host.ID().String(),
Timestamp: time.Now(), Timestamp: time.Now(),
Data: data, Data: payload,
} }
msgBytes, err := json.Marshal(msg) msgBytes, err := json.Marshal(msg)
@@ -442,11 +501,16 @@ func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[str
// PublishRoleBasedMessage publishes a role-based collaboration message // PublishRoleBasedMessage publishes a role-based collaboration message
func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error { func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error {
topicName := p.chorusTopicName
if isRoleMessage(msgType) {
topicName = p.hmmmTopicName
}
payload := p.sanitizePayload(topicName, msgType, data)
msg := Message{ msg := Message{
Type: msgType, Type: msgType,
From: p.host.ID().String(), From: p.host.ID().String(),
Timestamp: time.Now(), Timestamp: time.Now(),
Data: data, Data: payload,
FromRole: opts.FromRole, FromRole: opts.FromRole,
ToRoles: opts.ToRoles, ToRoles: opts.ToRoles,
RequiredExpertise: opts.RequiredExpertise, RequiredExpertise: opts.RequiredExpertise,
@@ -462,10 +526,8 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in
// Determine which topic to use based on message type // Determine which topic to use based on message type
var topic *pubsub.Topic var topic *pubsub.Topic
switch msgType { switch {
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate, case isRoleMessage(msgType):
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
ProjectUpdate, DeliverableReady:
topic = p.hmmmTopic // Use HMMM topic for role-based messages topic = p.hmmmTopic // Use HMMM topic for role-based messages
default: default:
topic = p.chorusTopic // Default to Bzzz topic topic = p.chorusTopic // Default to Bzzz topic
@@ -604,15 +666,23 @@ func (p *PubSub) handleContextFeedbackMessages() {
} }
} }
// getDynamicHandler returns the raw handler for a topic if registered.
func (p *PubSub) getDynamicHandler(topicName string) func([]byte, peer.ID) {
p.dynamicHandlersMux.RLock()
handler := p.dynamicHandlers[topicName]
p.dynamicHandlersMux.RUnlock()
return handler
}
// handleDynamicMessages processes messages from a dynamic topic subscription // handleDynamicMessages processes messages from a dynamic topic subscription
func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) { func (p *PubSub) handleDynamicMessages(topicName string, sub *pubsub.Subscription) {
for { for {
msg, err := sub.Next(p.ctx) msg, err := sub.Next(p.ctx)
if err != nil { if err != nil {
if p.ctx.Err() != nil || err.Error() == "subscription cancelled" { if p.ctx.Err() != nil || err.Error() == "subscription cancelled" {
return // Subscription was cancelled, exit handler return // Subscription was cancelled, exit handler
} }
fmt.Printf("❌ Error receiving dynamic message: %v\n", err) fmt.Printf("❌ Error receiving dynamic message on %s: %v\n", topicName, err)
continue continue
} }
@@ -620,13 +690,18 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
continue continue
} }
var dynamicMsg Message if handler := p.getDynamicHandler(topicName); handler != nil {
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil { handler(msg.Data, msg.ReceivedFrom)
fmt.Printf("❌ Failed to unmarshal dynamic message: %v\n", err)
continue continue
} }
// Use the main HMMM handler for all dynamic messages var dynamicMsg Message
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil {
fmt.Printf("❌ Failed to unmarshal dynamic message on %s: %v\n", topicName, err)
continue
}
// Use the main HMMM handler for all dynamic messages without custom handlers
if p.HmmmMessageHandler != nil { if p.HmmmMessageHandler != nil {
p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom) p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom)
} }
@@ -764,6 +839,68 @@ func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
} }
} }
func (p *PubSub) sanitizePayload(topic string, msgType MessageType, data map[string]interface{}) map[string]interface{} {
if data == nil {
return nil
}
cloned := clonePayloadMap(data)
p.redactorMux.RLock()
redactor := p.redactor
p.redactorMux.RUnlock()
if redactor != nil {
labels := map[string]string{
"source": "pubsub",
"topic": topic,
"message_type": string(msgType),
}
redactor.RedactMapWithLabels(context.Background(), cloned, labels)
}
return cloned
}
func isRoleMessage(msgType MessageType) bool {
switch msgType {
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate,
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
ProjectUpdate, DeliverableReady:
return true
default:
return false
}
}
func clonePayloadMap(in map[string]interface{}) map[string]interface{} {
if in == nil {
return nil
}
out := make(map[string]interface{}, len(in))
for k, v := range in {
out[k] = clonePayloadValue(v)
}
return out
}
func clonePayloadValue(v interface{}) interface{} {
switch tv := v.(type) {
case map[string]interface{}:
return clonePayloadMap(tv)
case []interface{}:
return clonePayloadSlice(tv)
case []string:
return append([]string(nil), tv...)
default:
return tv
}
}
func clonePayloadSlice(in []interface{}) []interface{} {
out := make([]interface{}, len(in))
for i, val := range in {
out[i] = clonePayloadValue(val)
}
return out
}
// Close shuts down the PubSub instance // Close shuts down the PubSub instance
func (p *PubSub) Close() error { func (p *PubSub) Close() error {
p.cancel() p.cancel()
@@ -788,6 +925,12 @@ func (p *PubSub) Close() error {
p.contextTopic.Close() p.contextTopic.Close()
} }
p.dynamicSubsMux.Lock()
for _, sub := range p.dynamicSubs {
sub.Cancel()
}
p.dynamicSubsMux.Unlock()
p.dynamicTopicsMux.Lock() p.dynamicTopicsMux.Lock()
for _, topic := range p.dynamicTopics { for _, topic := range p.dynamicTopics {
topic.Close() topic.Close()