Harden CHORUS security and messaging stack
This commit is contained in:
@@ -10,7 +10,7 @@ CHORUS is the runtime that ties the CHORUS ecosystem together: libp2p mesh, DHT-
|
||||
| DHT + DecisionPublisher | ✅ Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. |
|
||||
| Election manager | ✅ Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. |
|
||||
| SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. |
|
||||
| SHHH (secrets sentinel) | ❌ Not implemented | No `pkg/shhh` module yet; redaction hooks are pending. |
|
||||
| SHHH (secrets sentinel) | 🚧 Sentinel live | `pkg/shhh` redacts hypercore + PubSub payloads with audit + metrics hooks (policy replay TBD). |
|
||||
| HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). |
|
||||
|
||||
See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing.
|
||||
@@ -33,7 +33,7 @@ You’ll get a single agent container with:
|
||||
- DHT storage (AGE-encrypted)
|
||||
- HTTP API + health endpoints
|
||||
|
||||
**Missing today:** SLURP context resolution, SHHH redaction, HMMM per-issue routing. Expect log warnings/TODOs for those paths.
|
||||
**Missing today:** SLURP context resolution, advanced SHHH policy replay, HMMM per-issue routing. Expect log warnings/TODOs for those paths.
|
||||
|
||||
## Roadmap Highlights
|
||||
|
||||
|
||||
@@ -9,50 +9,57 @@ import (
|
||||
|
||||
"chorus/internal/logging"
|
||||
"chorus/pkg/config"
|
||||
"chorus/pubsub"
|
||||
"chorus/pkg/repository"
|
||||
"chorus/pkg/hmmm"
|
||||
"chorus/pkg/repository"
|
||||
"chorus/pubsub"
|
||||
"github.com/google/uuid"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
// TaskProgressTracker is notified when tasks start and complete so availability broadcasts stay accurate.
|
||||
type TaskProgressTracker interface {
|
||||
AddTask(taskID string)
|
||||
RemoveTask(taskID string)
|
||||
}
|
||||
|
||||
// TaskCoordinator manages task discovery, assignment, and execution across multiple repositories
|
||||
type TaskCoordinator struct {
|
||||
pubsub *pubsub.PubSub
|
||||
hlog *logging.HypercoreLog
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
hmmmRouter *hmmm.Router
|
||||
pubsub *pubsub.PubSub
|
||||
hlog *logging.HypercoreLog
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
hmmmRouter *hmmm.Router
|
||||
|
||||
// Repository management
|
||||
providers map[int]repository.TaskProvider // projectID -> provider
|
||||
providerLock sync.RWMutex
|
||||
factory repository.ProviderFactory
|
||||
providers map[int]repository.TaskProvider // projectID -> provider
|
||||
providerLock sync.RWMutex
|
||||
factory repository.ProviderFactory
|
||||
|
||||
// Task management
|
||||
activeTasks map[string]*ActiveTask // taskKey -> active task
|
||||
taskLock sync.RWMutex
|
||||
taskMatcher repository.TaskMatcher
|
||||
activeTasks map[string]*ActiveTask // taskKey -> active task
|
||||
taskLock sync.RWMutex
|
||||
taskMatcher repository.TaskMatcher
|
||||
taskTracker TaskProgressTracker
|
||||
|
||||
// Agent tracking
|
||||
nodeID string
|
||||
agentInfo *repository.AgentInfo
|
||||
nodeID string
|
||||
agentInfo *repository.AgentInfo
|
||||
|
||||
// Sync settings
|
||||
syncInterval time.Duration
|
||||
lastSync map[int]time.Time
|
||||
syncLock sync.RWMutex
|
||||
syncInterval time.Duration
|
||||
lastSync map[int]time.Time
|
||||
syncLock sync.RWMutex
|
||||
}
|
||||
|
||||
// ActiveTask represents a task currently being worked on
|
||||
type ActiveTask struct {
|
||||
Task *repository.Task
|
||||
Provider repository.TaskProvider
|
||||
ProjectID int
|
||||
ClaimedAt time.Time
|
||||
Status string // claimed, working, completed, failed
|
||||
AgentID string
|
||||
Results map[string]interface{}
|
||||
Task *repository.Task
|
||||
Provider repository.TaskProvider
|
||||
ProjectID int
|
||||
ClaimedAt time.Time
|
||||
Status string // claimed, working, completed, failed
|
||||
AgentID string
|
||||
Results map[string]interface{}
|
||||
}
|
||||
|
||||
// NewTaskCoordinator creates a new task coordinator
|
||||
@@ -63,7 +70,9 @@ func NewTaskCoordinator(
|
||||
cfg *config.Config,
|
||||
nodeID string,
|
||||
hmmmRouter *hmmm.Router,
|
||||
tracker TaskProgressTracker,
|
||||
) *TaskCoordinator {
|
||||
|
||||
coordinator := &TaskCoordinator{
|
||||
pubsub: ps,
|
||||
hlog: hlog,
|
||||
@@ -75,6 +84,7 @@ func NewTaskCoordinator(
|
||||
lastSync: make(map[int]time.Time),
|
||||
factory: &repository.DefaultProviderFactory{},
|
||||
taskMatcher: &repository.DefaultTaskMatcher{},
|
||||
taskTracker: tracker,
|
||||
nodeID: nodeID,
|
||||
syncInterval: 30 * time.Second,
|
||||
}
|
||||
@@ -185,13 +195,17 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor
|
||||
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
||||
tc.taskLock.Unlock()
|
||||
|
||||
if tc.taskTracker != nil {
|
||||
tc.taskTracker.AddTask(taskKey)
|
||||
}
|
||||
|
||||
// Log task claim
|
||||
tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"required_role": task.RequiredRole,
|
||||
"priority": task.Priority,
|
||||
"priority": task.Priority,
|
||||
})
|
||||
|
||||
// Announce task claim
|
||||
@@ -212,11 +226,11 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor
|
||||
}
|
||||
if err := tc.hmmmRouter.Publish(tc.ctx, seedMsg); err != nil {
|
||||
fmt.Printf("⚠️ Failed to seed HMMM room for task %d: %v\n", task.Number, err)
|
||||
tc.hlog.AppendString("system_error", map[string]interface{}{
|
||||
"error": "hmmm_seed_failed",
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"message": err.Error(),
|
||||
tc.hlog.AppendString("system_error", map[string]interface{}{
|
||||
"error": "hmmm_seed_failed",
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"message": err.Error(),
|
||||
})
|
||||
} else {
|
||||
fmt.Printf("🐜 Seeded HMMM room for task %d\n", task.Number)
|
||||
@@ -259,14 +273,14 @@ func (tc *TaskCoordinator) shouldRequestCollaboration(task *repository.Task) boo
|
||||
// requestTaskCollaboration requests collaboration for a task
|
||||
func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) {
|
||||
data := map[string]interface{}{
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"required_role": task.RequiredRole,
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"required_role": task.RequiredRole,
|
||||
"required_expertise": task.RequiredExpertise,
|
||||
"priority": task.Priority,
|
||||
"requester_role": tc.agentInfo.Role,
|
||||
"reason": "expertise_gap",
|
||||
"priority": task.Priority,
|
||||
"requester_role": tc.agentInfo.Role,
|
||||
"reason": "expertise_gap",
|
||||
}
|
||||
|
||||
opts := pubsub.MessageOptions{
|
||||
@@ -302,10 +316,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
||||
|
||||
// Complete the task
|
||||
results := map[string]interface{}{
|
||||
"status": "completed",
|
||||
"status": "completed",
|
||||
"completion_time": time.Now().Format(time.RFC3339),
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
}
|
||||
|
||||
taskResult := &repository.TaskResult{
|
||||
@@ -334,6 +348,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
||||
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
||||
tc.taskLock.Unlock()
|
||||
|
||||
if tc.taskTracker != nil {
|
||||
tc.taskTracker.RemoveTask(taskKey)
|
||||
}
|
||||
|
||||
// Log completion
|
||||
tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{
|
||||
"task_number": activeTask.Task.Number,
|
||||
@@ -378,19 +396,19 @@ func (tc *TaskCoordinator) announceAgentRole() {
|
||||
// announceTaskClaim announces that this agent has claimed a task
|
||||
func (tc *TaskCoordinator) announceTaskClaim(task *repository.Task) {
|
||||
data := map[string]interface{}{
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
"claim_time": time.Now().Format(time.RFC3339),
|
||||
"task_number": task.Number,
|
||||
"repository": task.Repository,
|
||||
"title": task.Title,
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
"claim_time": time.Now().Format(time.RFC3339),
|
||||
"estimated_completion": time.Now().Add(time.Hour).Format(time.RFC3339),
|
||||
}
|
||||
|
||||
opts := pubsub.MessageOptions{
|
||||
FromRole: tc.agentInfo.Role,
|
||||
Priority: "medium",
|
||||
ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number),
|
||||
FromRole: tc.agentInfo.Role,
|
||||
Priority: "medium",
|
||||
ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number),
|
||||
}
|
||||
|
||||
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskProgress, data, opts)
|
||||
@@ -463,15 +481,15 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I
|
||||
}
|
||||
}
|
||||
|
||||
if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks {
|
||||
if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks {
|
||||
// Offer help
|
||||
responseData := map[string]interface{}{
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
"expertise": tc.agentInfo.Expertise,
|
||||
"availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks,
|
||||
"offer_type": "collaboration",
|
||||
"response_to": msg.Data,
|
||||
"agent_id": tc.agentInfo.ID,
|
||||
"agent_role": tc.agentInfo.Role,
|
||||
"expertise": tc.agentInfo.Expertise,
|
||||
"availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks,
|
||||
"offer_type": "collaboration",
|
||||
"response_to": msg.Data,
|
||||
}
|
||||
|
||||
opts := pubsub.MessageOptions{
|
||||
@@ -480,34 +498,34 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I
|
||||
ThreadID: msg.ThreadID,
|
||||
}
|
||||
|
||||
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts)
|
||||
if err != nil {
|
||||
fmt.Printf("⚠️ Failed to offer help: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("🤝 Offered help for task collaboration\n")
|
||||
}
|
||||
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts)
|
||||
if err != nil {
|
||||
fmt.Printf("⚠️ Failed to offer help: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("🤝 Offered help for task collaboration\n")
|
||||
}
|
||||
|
||||
// Also reflect the help offer into the HMMM per-issue room (best-effort)
|
||||
if tc.hmmmRouter != nil {
|
||||
if tn, ok := msg.Data["task_number"].(float64); ok {
|
||||
issueID := int64(tn)
|
||||
hmsg := hmmm.Message{
|
||||
Version: 1,
|
||||
Type: "meta_msg",
|
||||
IssueID: issueID,
|
||||
ThreadID: fmt.Sprintf("issue-%d", issueID),
|
||||
MsgID: uuid.New().String(),
|
||||
NodeID: tc.nodeID,
|
||||
HopCount: 0,
|
||||
Timestamp: time.Now().UTC(),
|
||||
Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks),
|
||||
}
|
||||
if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil {
|
||||
fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Also reflect the help offer into the HMMM per-issue room (best-effort)
|
||||
if tc.hmmmRouter != nil {
|
||||
if tn, ok := msg.Data["task_number"].(float64); ok {
|
||||
issueID := int64(tn)
|
||||
hmsg := hmmm.Message{
|
||||
Version: 1,
|
||||
Type: "meta_msg",
|
||||
IssueID: issueID,
|
||||
ThreadID: fmt.Sprintf("issue-%d", issueID),
|
||||
MsgID: uuid.New().String(),
|
||||
NodeID: tc.nodeID,
|
||||
HopCount: 0,
|
||||
Timestamp: time.Now().UTC(),
|
||||
Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks),
|
||||
}
|
||||
if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil {
|
||||
fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleExpertiseRequest handles requests for specific expertise
|
||||
|
||||
30
docs/decisions/2025-02-16-shhh-sentinel-foundation.md
Normal file
30
docs/decisions/2025-02-16-shhh-sentinel-foundation.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Decision Record: Establish SHHH Sentinel Foundations
|
||||
|
||||
- **Date:** 2025-02-16
|
||||
- **Status:** Accepted
|
||||
- **Context:** CHORUS roadmap Phase 1 requires a secrets sentinel (`pkg/shhh`) before we wire COOEE/WHOOSH telemetry and audit plumbing. The runtime previously emitted placeholder TODOs and logged sensitive payloads without guard rails.
|
||||
|
||||
## Problem
|
||||
- We lacked a reusable component to detect and redact secrets prior to log/telemetry fan-out.
|
||||
- Without a dedicated sentinel we could not attach audit sinks or surface metrics for redaction events, blocking roadmap item `SEC-SHHH`.
|
||||
|
||||
## Decision
|
||||
- Introduce `pkg/shhh` as the SHHH sentinel with:
|
||||
- Curated default rules (API keys, bearer/OAuth tokens, private key PEM blocks, OpenAI secrets).
|
||||
- Extensible configuration for custom regex rules and per-rule severity/tags.
|
||||
- Optional audit sink and statistics collection for integration with COOEE/WHOOSH pipelines.
|
||||
- Helpers to redact free-form text and `map[string]any` payloads used by our logging pipeline.
|
||||
|
||||
## Rationale
|
||||
- Starting with a focused set of high-signal rules gives immediate coverage for the most damaging leak classes without delaying larger SLURP/SHHH workstreams.
|
||||
- The API mirrors other CHORUS subsystems (options, config structs, stats snapshots) so existing operators can plug metrics/audits without bespoke glue.
|
||||
- Providing deterministic findings/locations simplifies future enforcement (e.g., WHOOSH UI badges, COOEE replay) while keeping implementation lean.
|
||||
|
||||
## Impact
|
||||
- Runtime components can now instantiate SHHH and guarantee `[REDACTED]` placeholders for sensitive fields.
|
||||
- Audit/event plumbing can be wired incrementally—hashes are emitted for replay without storing raw secrets.
|
||||
- Future roadmap tasks (policy driven rules, replay, UCXL evidence) can extend `pkg/shhh` rather than implementing ad-hoc redaction in each subsystem.
|
||||
|
||||
## Related Work
|
||||
- Roadmap: `docs/progress/CHORUS-WHOOSH-roadmap.md` (Phase 1.2 `SEC-SHHH`).
|
||||
- README coverage gap noted in `README.md` table (SHHH not implemented).
|
||||
2
go.mod
2
go.mod
@@ -159,4 +159,4 @@ require (
|
||||
lukechampine.com/blake3 v1.2.1 // indirect
|
||||
)
|
||||
|
||||
replace github.com/chorus-services/backbeat => /home/tony/chorus/project-queues/active/BACKBEAT/backbeat/prototype
|
||||
replace github.com/chorus-services/backbeat => ../BACKBEAT/backbeat/prototype
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
@@ -8,6 +9,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/shhh"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
@@ -29,6 +31,8 @@ type HypercoreLog struct {
|
||||
|
||||
// Replication
|
||||
replicators map[peer.ID]*Replicator
|
||||
|
||||
redactor *shhh.Sentinel
|
||||
}
|
||||
|
||||
// LogEntry represents a single entry in the distributed log
|
||||
@@ -48,11 +52,11 @@ type LogType string
|
||||
|
||||
const (
|
||||
// Bzzz coordination logs
|
||||
TaskAnnounced LogType = "task_announced"
|
||||
TaskClaimed LogType = "task_claimed"
|
||||
TaskProgress LogType = "task_progress"
|
||||
TaskCompleted LogType = "task_completed"
|
||||
TaskFailed LogType = "task_failed"
|
||||
TaskAnnounced LogType = "task_announced"
|
||||
TaskClaimed LogType = "task_claimed"
|
||||
TaskProgress LogType = "task_progress"
|
||||
TaskCompleted LogType = "task_completed"
|
||||
TaskFailed LogType = "task_failed"
|
||||
|
||||
// HMMM meta-discussion logs
|
||||
PlanProposed LogType = "plan_proposed"
|
||||
@@ -65,17 +69,17 @@ const (
|
||||
TaskHelpReceived LogType = "task_help_received"
|
||||
|
||||
// System logs
|
||||
PeerJoined LogType = "peer_joined"
|
||||
PeerLeft LogType = "peer_left"
|
||||
PeerJoined LogType = "peer_joined"
|
||||
PeerLeft LogType = "peer_left"
|
||||
CapabilityBcast LogType = "capability_broadcast"
|
||||
NetworkEvent LogType = "network_event"
|
||||
NetworkEvent LogType = "network_event"
|
||||
)
|
||||
|
||||
// Replicator handles log replication with other peers
|
||||
type Replicator struct {
|
||||
peerID peer.ID
|
||||
peerID peer.ID
|
||||
lastSyncIndex uint64
|
||||
connected bool
|
||||
connected bool
|
||||
}
|
||||
|
||||
// NewHypercoreLog creates a new distributed log for a peer
|
||||
@@ -88,6 +92,13 @@ func NewHypercoreLog(peerID peer.ID) *HypercoreLog {
|
||||
}
|
||||
}
|
||||
|
||||
// SetRedactor wires the SHHH sentinel so log payloads are sanitized before persistence.
|
||||
func (h *HypercoreLog) SetRedactor(redactor *shhh.Sentinel) {
|
||||
h.mutex.Lock()
|
||||
defer h.mutex.Unlock()
|
||||
h.redactor = redactor
|
||||
}
|
||||
|
||||
// AppendString is a convenience method for string log types (to match interface)
|
||||
func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error {
|
||||
_, err := h.Append(LogType(logType), data)
|
||||
@@ -101,12 +112,14 @@ func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*Lo
|
||||
|
||||
index := uint64(len(h.entries))
|
||||
|
||||
sanitized := h.redactData(logType, data)
|
||||
|
||||
entry := LogEntry{
|
||||
Index: index,
|
||||
Timestamp: time.Now(),
|
||||
Author: h.peerID.String(),
|
||||
Type: logType,
|
||||
Data: data,
|
||||
Data: sanitized,
|
||||
PrevHash: h.headHash,
|
||||
}
|
||||
|
||||
@@ -276,9 +289,9 @@ func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
|
||||
defer h.mutex.Unlock()
|
||||
|
||||
h.replicators[peerID] = &Replicator{
|
||||
peerID: peerID,
|
||||
peerID: peerID,
|
||||
lastSyncIndex: 0,
|
||||
connected: true,
|
||||
connected: true,
|
||||
}
|
||||
|
||||
fmt.Printf("🔄 Added replicator: %s\n", peerID.ShortString())
|
||||
@@ -332,6 +345,65 @@ func (h *HypercoreLog) calculateEntryHash(entry LogEntry) (string, error) {
|
||||
return hex.EncodeToString(hash[:]), nil
|
||||
}
|
||||
|
||||
func (h *HypercoreLog) redactData(logType LogType, data map[string]interface{}) map[string]interface{} {
|
||||
cloned := cloneLogMap(data)
|
||||
if cloned == nil {
|
||||
return nil
|
||||
}
|
||||
if h.redactor != nil {
|
||||
labels := map[string]string{
|
||||
"source": "hypercore",
|
||||
"log_type": string(logType),
|
||||
}
|
||||
h.redactor.RedactMapWithLabels(context.Background(), cloned, labels)
|
||||
}
|
||||
return cloned
|
||||
}
|
||||
|
||||
func cloneLogMap(in map[string]interface{}) map[string]interface{} {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string]interface{}, len(in))
|
||||
for k, v := range in {
|
||||
out[k] = cloneLogValue(v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func cloneLogValue(v interface{}) interface{} {
|
||||
switch tv := v.(type) {
|
||||
case map[string]interface{}:
|
||||
return cloneLogMap(tv)
|
||||
case map[string]any:
|
||||
converted := make(map[string]interface{}, len(tv))
|
||||
for k, val := range tv {
|
||||
converted[k] = cloneLogValue(val)
|
||||
}
|
||||
return converted
|
||||
case []interface{}:
|
||||
return cloneLogSlice(tv)
|
||||
case []any:
|
||||
converted := make([]interface{}, len(tv))
|
||||
for i, val := range tv {
|
||||
converted[i] = cloneLogValue(val)
|
||||
}
|
||||
return converted
|
||||
case []string:
|
||||
return append([]string(nil), tv...)
|
||||
default:
|
||||
return tv
|
||||
}
|
||||
}
|
||||
|
||||
func cloneLogSlice(in []interface{}) []interface{} {
|
||||
out := make([]interface{}, len(in))
|
||||
for i, val := range in {
|
||||
out[i] = cloneLogValue(val)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// createSignature creates a simplified signature for the entry
|
||||
func (h *HypercoreLog) createSignature(entry LogEntry) string {
|
||||
// In production, this would use proper cryptographic signatures
|
||||
@@ -355,11 +427,11 @@ func (h *HypercoreLog) GetStats() map[string]interface{} {
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"total_entries": len(h.entries),
|
||||
"head_hash": h.headHash,
|
||||
"replicators": len(h.replicators),
|
||||
"entries_by_type": typeCount,
|
||||
"total_entries": len(h.entries),
|
||||
"head_hash": h.headHash,
|
||||
"replicators": len(h.replicators),
|
||||
"entries_by_type": typeCount,
|
||||
"entries_by_author": authorCount,
|
||||
"peer_id": h.peerID.String(),
|
||||
"peer_id": h.peerID.String(),
|
||||
}
|
||||
}
|
||||
@@ -2,9 +2,11 @@ package runtime
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"chorus/internal/logging"
|
||||
"chorus/pkg/dht"
|
||||
"chorus/pkg/health"
|
||||
"chorus/pkg/shutdown"
|
||||
"chorus/pubsub"
|
||||
@@ -99,13 +101,13 @@ func (r *SharedRuntime) announceAvailability() {
|
||||
}
|
||||
|
||||
availability := map[string]interface{}{
|
||||
"node_id": r.Node.ID().ShortString(),
|
||||
"node_id": r.Node.ID().ShortString(),
|
||||
"available_for_work": isAvailable,
|
||||
"current_tasks": len(currentTasks),
|
||||
"max_tasks": maxTasks,
|
||||
"last_activity": time.Now().Unix(),
|
||||
"status": status,
|
||||
"timestamp": time.Now().Unix(),
|
||||
"current_tasks": len(currentTasks),
|
||||
"max_tasks": maxTasks,
|
||||
"last_activity": time.Now().Unix(),
|
||||
"status": status,
|
||||
"timestamp": time.Now().Unix(),
|
||||
}
|
||||
if err := r.PubSub.PublishBzzzMessage(pubsub.AvailabilityBcast, availability); err != nil {
|
||||
r.Logger.Error("❌ Failed to announce availability: %v", err)
|
||||
@@ -126,16 +128,79 @@ func (r *SharedRuntime) statusReporter() {
|
||||
|
||||
// announceCapabilitiesOnChange announces capabilities when they change
|
||||
func (r *SharedRuntime) announceCapabilitiesOnChange() {
|
||||
// Implementation from CHORUS would go here
|
||||
// For now, just log that capabilities would be announced
|
||||
r.Logger.Info("📢 Agent capabilities announcement enabled")
|
||||
if r.PubSub == nil {
|
||||
r.Logger.Warn("⚠️ Capability broadcast skipped: PubSub not initialized")
|
||||
return
|
||||
}
|
||||
|
||||
r.Logger.Info("📢 Broadcasting agent capabilities to network")
|
||||
|
||||
activeTaskCount := 0
|
||||
if r.TaskTracker != nil {
|
||||
activeTaskCount = len(r.TaskTracker.GetActiveTasks())
|
||||
}
|
||||
|
||||
announcement := map[string]interface{}{
|
||||
"agent_id": r.Config.Agent.ID,
|
||||
"node_id": r.Node.ID().ShortString(),
|
||||
"version": AppVersion,
|
||||
"capabilities": r.Config.Agent.Capabilities,
|
||||
"expertise": r.Config.Agent.Expertise,
|
||||
"models": r.Config.Agent.Models,
|
||||
"specialization": r.Config.Agent.Specialization,
|
||||
"max_tasks": r.Config.Agent.MaxTasks,
|
||||
"current_tasks": activeTaskCount,
|
||||
"timestamp": time.Now().Unix(),
|
||||
"availability": "ready",
|
||||
}
|
||||
|
||||
if err := r.PubSub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil {
|
||||
r.Logger.Error("❌ Failed to broadcast capabilities: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
r.Logger.Info("✅ Capabilities broadcast published")
|
||||
|
||||
// TODO: Watch for live capability changes (role updates, model changes) and re-broadcast
|
||||
}
|
||||
|
||||
// announceRoleOnStartup announces role when the agent starts
|
||||
func (r *SharedRuntime) announceRoleOnStartup() {
|
||||
// Implementation from CHORUS would go here
|
||||
// For now, just log that role would be announced
|
||||
r.Logger.Info("🎭 Agent role announcement enabled")
|
||||
role := r.Config.Agent.Role
|
||||
if role == "" {
|
||||
r.Logger.Info("🎭 No agent role configured; skipping role announcement")
|
||||
return
|
||||
}
|
||||
if r.PubSub == nil {
|
||||
r.Logger.Warn("⚠️ Role announcement skipped: PubSub not initialized")
|
||||
return
|
||||
}
|
||||
|
||||
r.Logger.Info("🎭 Announcing agent role to collaboration mesh")
|
||||
|
||||
announcement := map[string]interface{}{
|
||||
"agent_id": r.Config.Agent.ID,
|
||||
"node_id": r.Node.ID().ShortString(),
|
||||
"role": role,
|
||||
"expertise": r.Config.Agent.Expertise,
|
||||
"capabilities": r.Config.Agent.Capabilities,
|
||||
"reports_to": r.Config.Agent.ReportsTo,
|
||||
"specialization": r.Config.Agent.Specialization,
|
||||
"timestamp": time.Now().Unix(),
|
||||
}
|
||||
|
||||
opts := pubsub.MessageOptions{
|
||||
FromRole: role,
|
||||
Priority: "medium",
|
||||
ThreadID: fmt.Sprintf("role:%s", role),
|
||||
}
|
||||
|
||||
if err := r.PubSub.PublishRoleBasedMessage(pubsub.RoleAnnouncement, announcement, opts); err != nil {
|
||||
r.Logger.Error("❌ Failed to announce role: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
r.Logger.Info("✅ Role announcement published")
|
||||
}
|
||||
|
||||
func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
|
||||
@@ -170,12 +235,89 @@ func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
|
||||
healthManager.RegisterCheck(backbeatCheck)
|
||||
}
|
||||
|
||||
// Add other health checks (P2P, DHT, etc.)
|
||||
// Implementation from CHORUS would go here
|
||||
// Register enhanced health instrumentation when core subsystems are available
|
||||
if r.PubSub == nil {
|
||||
r.Logger.Warn("⚠️ Skipping enhanced health checks: PubSub not initialized")
|
||||
return
|
||||
}
|
||||
if r.ElectionManager == nil {
|
||||
r.Logger.Warn("⚠️ Skipping enhanced health checks: election manager not ready")
|
||||
return
|
||||
}
|
||||
|
||||
var replication *dht.ReplicationManager
|
||||
if r.DHTNode != nil {
|
||||
replication = r.DHTNode.ReplicationManager()
|
||||
}
|
||||
|
||||
enhanced := health.NewEnhancedHealthChecks(
|
||||
healthManager,
|
||||
r.ElectionManager,
|
||||
r.DHTNode,
|
||||
r.PubSub,
|
||||
replication,
|
||||
&simpleLogger{logger: r.Logger},
|
||||
)
|
||||
|
||||
r.EnhancedHealth = enhanced
|
||||
r.Logger.Info("🩺 Enhanced health checks registered")
|
||||
}
|
||||
|
||||
func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) {
|
||||
// Register components for graceful shutdown
|
||||
// Implementation would register all components that need graceful shutdown
|
||||
if shutdownManager == nil {
|
||||
r.Logger.Warn("⚠️ Shutdown manager not initialized; graceful teardown skipped")
|
||||
return
|
||||
}
|
||||
|
||||
if r.HTTPServer != nil {
|
||||
httpComponent := shutdown.NewGenericComponent("http-api-server", 10, true).
|
||||
SetShutdownFunc(func(ctx context.Context) error {
|
||||
return r.HTTPServer.Stop()
|
||||
})
|
||||
shutdownManager.Register(httpComponent)
|
||||
}
|
||||
|
||||
if healthManager != nil {
|
||||
healthComponent := shutdown.NewGenericComponent("health-manager", 15, true).
|
||||
SetShutdownFunc(func(ctx context.Context) error {
|
||||
return healthManager.Stop()
|
||||
})
|
||||
shutdownManager.Register(healthComponent)
|
||||
}
|
||||
|
||||
if r.UCXIServer != nil {
|
||||
ucxiComponent := shutdown.NewGenericComponent("ucxi-server", 20, true).
|
||||
SetShutdownFunc(func(ctx context.Context) error {
|
||||
return r.UCXIServer.Stop()
|
||||
})
|
||||
shutdownManager.Register(ucxiComponent)
|
||||
}
|
||||
|
||||
if r.PubSub != nil {
|
||||
shutdownManager.Register(shutdown.NewPubSubComponent("pubsub", r.PubSub.Close, 30))
|
||||
}
|
||||
|
||||
if r.DHTNode != nil {
|
||||
dhtComponent := shutdown.NewGenericComponent("dht-node", 35, true).
|
||||
SetCloser(r.DHTNode.Close)
|
||||
shutdownManager.Register(dhtComponent)
|
||||
}
|
||||
|
||||
if r.Node != nil {
|
||||
shutdownManager.Register(shutdown.NewP2PNodeComponent("p2p-node", r.Node.Close, 40))
|
||||
}
|
||||
|
||||
if r.ElectionManager != nil {
|
||||
shutdownManager.Register(shutdown.NewElectionManagerComponent("election-manager", r.ElectionManager.Stop, 45))
|
||||
}
|
||||
|
||||
if r.BackbeatIntegration != nil {
|
||||
backbeatComponent := shutdown.NewGenericComponent("backbeat-integration", 50, true).
|
||||
SetShutdownFunc(func(ctx context.Context) error {
|
||||
return r.BackbeatIntegration.Stop()
|
||||
})
|
||||
shutdownManager.Register(backbeatComponent)
|
||||
}
|
||||
|
||||
r.Logger.Info("🛡️ Graceful shutdown components registered")
|
||||
}
|
||||
@@ -21,8 +21,10 @@ import (
|
||||
"chorus/pkg/dht"
|
||||
"chorus/pkg/election"
|
||||
"chorus/pkg/health"
|
||||
"chorus/pkg/shutdown"
|
||||
"chorus/pkg/metrics"
|
||||
"chorus/pkg/prompt"
|
||||
"chorus/pkg/shhh"
|
||||
"chorus/pkg/shutdown"
|
||||
"chorus/pkg/ucxi"
|
||||
"chorus/pkg/ucxl"
|
||||
"chorus/pubsub"
|
||||
@@ -53,8 +55,8 @@ func (l *SimpleLogger) Error(msg string, args ...interface{}) {
|
||||
|
||||
// SimpleTaskTracker tracks active tasks for availability reporting
|
||||
type SimpleTaskTracker struct {
|
||||
maxTasks int
|
||||
activeTasks map[string]bool
|
||||
maxTasks int
|
||||
activeTasks map[string]bool
|
||||
decisionPublisher *ucxl.DecisionPublisher
|
||||
}
|
||||
|
||||
@@ -102,25 +104,28 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s
|
||||
|
||||
// SharedRuntime contains all the shared P2P infrastructure components
|
||||
type SharedRuntime struct {
|
||||
Config *config.Config
|
||||
Logger *SimpleLogger
|
||||
Context context.Context
|
||||
Cancel context.CancelFunc
|
||||
Node *p2p.Node
|
||||
PubSub *pubsub.PubSub
|
||||
HypercoreLog *logging.HypercoreLog
|
||||
MDNSDiscovery *discovery.MDNSDiscovery
|
||||
BackbeatIntegration *backbeat.Integration
|
||||
DHTNode *dht.LibP2PDHT
|
||||
EncryptedStorage *dht.EncryptedDHTStorage
|
||||
DecisionPublisher *ucxl.DecisionPublisher
|
||||
ElectionManager *election.ElectionManager
|
||||
TaskCoordinator *coordinator.TaskCoordinator
|
||||
HTTPServer *api.HTTPServer
|
||||
UCXIServer *ucxi.Server
|
||||
HealthManager *health.Manager
|
||||
ShutdownManager *shutdown.Manager
|
||||
TaskTracker *SimpleTaskTracker
|
||||
Config *config.Config
|
||||
Logger *SimpleLogger
|
||||
Context context.Context
|
||||
Cancel context.CancelFunc
|
||||
Node *p2p.Node
|
||||
PubSub *pubsub.PubSub
|
||||
HypercoreLog *logging.HypercoreLog
|
||||
MDNSDiscovery *discovery.MDNSDiscovery
|
||||
BackbeatIntegration *backbeat.Integration
|
||||
DHTNode *dht.LibP2PDHT
|
||||
EncryptedStorage *dht.EncryptedDHTStorage
|
||||
DecisionPublisher *ucxl.DecisionPublisher
|
||||
ElectionManager *election.ElectionManager
|
||||
TaskCoordinator *coordinator.TaskCoordinator
|
||||
HTTPServer *api.HTTPServer
|
||||
UCXIServer *ucxi.Server
|
||||
HealthManager *health.Manager
|
||||
EnhancedHealth *health.EnhancedHealthChecks
|
||||
ShutdownManager *shutdown.Manager
|
||||
TaskTracker *SimpleTaskTracker
|
||||
Metrics *metrics.CHORUSMetrics
|
||||
Shhh *shhh.Sentinel
|
||||
}
|
||||
|
||||
// Initialize sets up all shared P2P infrastructure components
|
||||
@@ -166,6 +171,21 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
||||
}
|
||||
runtime.Logger.Info("✅ AI provider configured successfully")
|
||||
|
||||
// Initialize metrics collector
|
||||
runtime.Metrics = metrics.NewCHORUSMetrics(nil)
|
||||
|
||||
// Initialize SHHH sentinel
|
||||
sentinel, err := shhh.NewSentinel(
|
||||
shhh.Config{},
|
||||
shhh.WithFindingObserver(runtime.handleShhhFindings),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to initialize SHHH sentinel: %v", err)
|
||||
}
|
||||
sentinel.SetAuditSink(&shhhAuditSink{logger: runtime.Logger})
|
||||
runtime.Shhh = sentinel
|
||||
runtime.Logger.Info("🛡️ SHHH sentinel initialized")
|
||||
|
||||
// Initialize BACKBEAT integration
|
||||
var backbeatIntegration *backbeat.Integration
|
||||
backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger)
|
||||
@@ -198,6 +218,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
||||
|
||||
// Initialize Hypercore-style logger for P2P coordination
|
||||
hlog := logging.NewHypercoreLog(node.ID())
|
||||
if runtime.Shhh != nil {
|
||||
hlog.SetRedactor(runtime.Shhh)
|
||||
}
|
||||
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
|
||||
runtime.HypercoreLog = hlog
|
||||
runtime.Logger.Info("📝 Hypercore logger initialized")
|
||||
@@ -214,6 +237,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create PubSub: %v", err)
|
||||
}
|
||||
if runtime.Shhh != nil {
|
||||
ps.SetRedactor(runtime.Shhh)
|
||||
}
|
||||
runtime.PubSub = ps
|
||||
|
||||
runtime.Logger.Info("📡 PubSub system initialized")
|
||||
@@ -329,7 +355,7 @@ func (r *SharedRuntime) initializeElectionSystem() error {
|
||||
if r.BackbeatIntegration != nil {
|
||||
operationID := fmt.Sprintf("election-completed-%d", time.Now().Unix())
|
||||
if err := r.BackbeatIntegration.StartP2POperation(operationID, "election", 1, map[string]interface{}{
|
||||
"winner": winner,
|
||||
"winner": winner,
|
||||
"node_id": r.Node.ID().ShortString(),
|
||||
}); err == nil {
|
||||
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
|
||||
@@ -456,6 +482,19 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
||||
}
|
||||
|
||||
func (r *SharedRuntime) initializeServices() error {
|
||||
// Create simple task tracker ahead of coordinator so broadcasts stay accurate
|
||||
taskTracker := &SimpleTaskTracker{
|
||||
maxTasks: r.Config.Agent.MaxTasks,
|
||||
activeTasks: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Connect decision publisher to task tracker if available
|
||||
if r.DecisionPublisher != nil {
|
||||
taskTracker.decisionPublisher = r.DecisionPublisher
|
||||
r.Logger.Info("📤 Task completion decisions will be published to DHT")
|
||||
}
|
||||
r.TaskTracker = taskTracker
|
||||
|
||||
// === Task Coordination Integration ===
|
||||
taskCoordinator := coordinator.NewTaskCoordinator(
|
||||
r.Context,
|
||||
@@ -464,6 +503,7 @@ func (r *SharedRuntime) initializeServices() error {
|
||||
r.Config,
|
||||
r.Node.ID().ShortString(),
|
||||
nil, // HMMM router placeholder
|
||||
taskTracker,
|
||||
)
|
||||
|
||||
taskCoordinator.Start()
|
||||
@@ -515,23 +555,29 @@ func (r *SharedRuntime) initializeServices() error {
|
||||
r.Logger.Info("⚪ UCXI server disabled")
|
||||
}
|
||||
r.UCXIServer = ucxiServer
|
||||
|
||||
// Create simple task tracker
|
||||
taskTracker := &SimpleTaskTracker{
|
||||
maxTasks: r.Config.Agent.MaxTasks,
|
||||
activeTasks: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Connect decision publisher to task tracker if available
|
||||
if r.DecisionPublisher != nil {
|
||||
taskTracker.decisionPublisher = r.DecisionPublisher
|
||||
r.Logger.Info("📤 Task completion decisions will be published to DHT")
|
||||
}
|
||||
r.TaskTracker = taskTracker
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *SharedRuntime) handleShhhFindings(ctx context.Context, findings []shhh.Finding) {
|
||||
if r == nil || r.Metrics == nil {
|
||||
return
|
||||
}
|
||||
for _, finding := range findings {
|
||||
r.Metrics.IncrementSHHHFindings(finding.Rule, string(finding.Severity), finding.Count)
|
||||
}
|
||||
}
|
||||
|
||||
type shhhAuditSink struct {
|
||||
logger *SimpleLogger
|
||||
}
|
||||
|
||||
func (s *shhhAuditSink) RecordRedaction(_ context.Context, event shhh.AuditEvent) {
|
||||
if s == nil || s.logger == nil {
|
||||
return
|
||||
}
|
||||
s.logger.Warn("🔒 SHHH redaction applied (rule=%s severity=%s path=%s)", event.Rule, event.Severity, event.Path)
|
||||
}
|
||||
|
||||
// initializeAIProvider configures the reasoning engine with the appropriate AI provider
|
||||
func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
|
||||
// Set the AI provider
|
||||
|
||||
@@ -28,17 +28,18 @@ type Config struct {
|
||||
|
||||
// AgentConfig defines agent-specific settings
|
||||
type AgentConfig struct {
|
||||
ID string `yaml:"id"`
|
||||
Specialization string `yaml:"specialization"`
|
||||
MaxTasks int `yaml:"max_tasks"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
Models []string `yaml:"models"`
|
||||
Role string `yaml:"role"`
|
||||
Expertise []string `yaml:"expertise"`
|
||||
ReportsTo string `yaml:"reports_to"`
|
||||
Deliverables []string `yaml:"deliverables"`
|
||||
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
||||
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
||||
ID string `yaml:"id"`
|
||||
Specialization string `yaml:"specialization"`
|
||||
MaxTasks int `yaml:"max_tasks"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
Models []string `yaml:"models"`
|
||||
Role string `yaml:"role"`
|
||||
Project string `yaml:"project"`
|
||||
Expertise []string `yaml:"expertise"`
|
||||
ReportsTo string `yaml:"reports_to"`
|
||||
Deliverables []string `yaml:"deliverables"`
|
||||
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
||||
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
||||
}
|
||||
|
||||
// NetworkConfig defines network and API settings
|
||||
@@ -65,9 +66,9 @@ type LicenseConfig struct {
|
||||
|
||||
// AIConfig defines AI service settings
|
||||
type AIConfig struct {
|
||||
Provider string `yaml:"provider"`
|
||||
Ollama OllamaConfig `yaml:"ollama"`
|
||||
ResetData ResetDataConfig `yaml:"resetdata"`
|
||||
Provider string `yaml:"provider"`
|
||||
Ollama OllamaConfig `yaml:"ollama"`
|
||||
ResetData ResetDataConfig `yaml:"resetdata"`
|
||||
}
|
||||
|
||||
// OllamaConfig defines Ollama-specific settings
|
||||
@@ -78,10 +79,10 @@ type OllamaConfig struct {
|
||||
|
||||
// ResetDataConfig defines ResetData LLM service settings
|
||||
type ResetDataConfig struct {
|
||||
BaseURL string `yaml:"base_url"`
|
||||
APIKey string `yaml:"api_key"`
|
||||
Model string `yaml:"model"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
APIKey string `yaml:"api_key"`
|
||||
Model string `yaml:"model"`
|
||||
Timeout time.Duration `yaml:"timeout"`
|
||||
}
|
||||
|
||||
// LoggingConfig defines logging settings
|
||||
@@ -103,9 +104,9 @@ type DHTConfig struct {
|
||||
|
||||
// UCXLConfig defines UCXL protocol settings
|
||||
type UCXLConfig struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Server ServerConfig `yaml:"server"`
|
||||
Storage StorageConfig `yaml:"storage"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Server ServerConfig `yaml:"server"`
|
||||
Storage StorageConfig `yaml:"storage"`
|
||||
Resolution ResolutionConfig `yaml:"resolution"`
|
||||
}
|
||||
|
||||
@@ -133,25 +134,26 @@ type SlurpConfig struct {
|
||||
|
||||
// WHOOSHAPIConfig defines WHOOSH API integration settings
|
||||
type WHOOSHAPIConfig struct {
|
||||
URL string `yaml:"url"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
Token string `yaml:"token"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
URL string `yaml:"url"`
|
||||
BaseURL string `yaml:"base_url"`
|
||||
Token string `yaml:"token"`
|
||||
Enabled bool `yaml:"enabled"`
|
||||
}
|
||||
|
||||
// LoadFromEnvironment loads configuration from environment variables
|
||||
func LoadFromEnvironment() (*Config, error) {
|
||||
cfg := &Config{
|
||||
Agent: AgentConfig{
|
||||
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
||||
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
||||
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
||||
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
||||
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
||||
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
||||
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
||||
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
||||
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
||||
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
||||
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
||||
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
||||
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
||||
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
||||
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
||||
Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"),
|
||||
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
||||
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
||||
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
||||
ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
|
||||
DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
|
||||
},
|
||||
@@ -214,10 +216,10 @@ func LoadFromEnvironment() (*Config, error) {
|
||||
AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
|
||||
AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
|
||||
ElectionConfig: ElectionConfig{
|
||||
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
||||
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
||||
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
||||
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
||||
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
||||
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
||||
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
||||
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
||||
LeadershipScoring: &LeadershipScoring{
|
||||
UptimeWeight: 0.4,
|
||||
CapabilityWeight: 0.3,
|
||||
|
||||
@@ -12,27 +12,27 @@ const (
|
||||
|
||||
// SecurityConfig defines security-related configuration
|
||||
type SecurityConfig struct {
|
||||
KeyRotationDays int `yaml:"key_rotation_days"`
|
||||
AuditLogging bool `yaml:"audit_logging"`
|
||||
AuditPath string `yaml:"audit_path"`
|
||||
ElectionConfig ElectionConfig `yaml:"election"`
|
||||
KeyRotationDays int `yaml:"key_rotation_days"`
|
||||
AuditLogging bool `yaml:"audit_logging"`
|
||||
AuditPath string `yaml:"audit_path"`
|
||||
ElectionConfig ElectionConfig `yaml:"election"`
|
||||
}
|
||||
|
||||
// ElectionConfig defines election timing and behavior settings
|
||||
type ElectionConfig struct {
|
||||
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
||||
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
||||
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
||||
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
||||
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
||||
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
||||
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
||||
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
||||
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
||||
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
||||
}
|
||||
|
||||
// LeadershipScoring defines weights for election scoring
|
||||
type LeadershipScoring struct {
|
||||
UptimeWeight float64 `yaml:"uptime_weight"`
|
||||
CapabilityWeight float64 `yaml:"capability_weight"`
|
||||
ExperienceWeight float64 `yaml:"experience_weight"`
|
||||
LoadWeight float64 `yaml:"load_weight"`
|
||||
UptimeWeight float64 `yaml:"uptime_weight"`
|
||||
CapabilityWeight float64 `yaml:"capability_weight"`
|
||||
ExperienceWeight float64 `yaml:"experience_weight"`
|
||||
LoadWeight float64 `yaml:"load_weight"`
|
||||
}
|
||||
|
||||
// AgeKeyPair represents an Age encryption key pair
|
||||
@@ -43,14 +43,14 @@ type AgeKeyPair struct {
|
||||
|
||||
// RoleDefinition represents a role configuration
|
||||
type RoleDefinition struct {
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
AccessLevel string `yaml:"access_level"`
|
||||
AuthorityLevel string `yaml:"authority_level"`
|
||||
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
||||
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
||||
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
Capabilities []string `yaml:"capabilities"`
|
||||
AccessLevel string `yaml:"access_level"`
|
||||
AuthorityLevel string `yaml:"authority_level"`
|
||||
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
||||
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
||||
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
||||
}
|
||||
|
||||
// GetPredefinedRoles returns the predefined roles for the system
|
||||
@@ -96,6 +96,46 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"},
|
||||
},
|
||||
"security_expert": {
|
||||
Name: "security_expert",
|
||||
Description: "Advanced security analysis and policy work",
|
||||
Capabilities: []string{"security", "policy", "response"},
|
||||
AccessLevel: "high",
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"security_expert", "security_engineer", "project_manager"},
|
||||
},
|
||||
"senior_software_architect": {
|
||||
Name: "senior_software_architect",
|
||||
Description: "Architecture governance and system design",
|
||||
Capabilities: []string{"architecture", "design", "coordination"},
|
||||
AccessLevel: "high",
|
||||
AuthorityLevel: AuthorityAdmin,
|
||||
CanDecrypt: []string{"senior_software_architect", "project_manager", "backend_developer", "frontend_developer"},
|
||||
},
|
||||
"qa_engineer": {
|
||||
Name: "qa_engineer",
|
||||
Description: "Quality assurance and testing",
|
||||
Capabilities: []string{"testing", "validation"},
|
||||
AccessLevel: "medium",
|
||||
AuthorityLevel: AuthorityFull,
|
||||
CanDecrypt: []string{"qa_engineer", "backend_developer", "frontend_developer"},
|
||||
},
|
||||
"readonly_user": {
|
||||
Name: "readonly_user",
|
||||
Description: "Read-only observer with audit access",
|
||||
Capabilities: []string{"observation"},
|
||||
AccessLevel: "low",
|
||||
AuthorityLevel: AuthorityReadOnly,
|
||||
CanDecrypt: []string{"readonly_user"},
|
||||
},
|
||||
"suggestion_only_role": {
|
||||
Name: "suggestion_only_role",
|
||||
Description: "Can propose suggestions but not execute",
|
||||
Capabilities: []string{"recommendation"},
|
||||
AccessLevel: "low",
|
||||
AuthorityLevel: AuthoritySuggestion,
|
||||
CanDecrypt: []string{"suggestion_only_role"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -6,24 +6,25 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"crypto/sha256"
|
||||
"github.com/ipfs/go-cid"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/core/protocol"
|
||||
"github.com/libp2p/go-libp2p/core/routing"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
"github.com/multiformats/go-multihash"
|
||||
"github.com/ipfs/go-cid"
|
||||
"crypto/sha256"
|
||||
)
|
||||
|
||||
// LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery
|
||||
type LibP2PDHT struct {
|
||||
host host.Host
|
||||
kdht *dht.IpfsDHT
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *Config
|
||||
host host.Host
|
||||
kdht *dht.IpfsDHT
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *Config
|
||||
startTime time.Time
|
||||
|
||||
// Bootstrap state
|
||||
bootstrapped bool
|
||||
@@ -59,12 +60,14 @@ type Config struct {
|
||||
}
|
||||
|
||||
// PeerInfo holds information about discovered peers
|
||||
const defaultProviderResultLimit = 20
|
||||
|
||||
type PeerInfo struct {
|
||||
ID peer.ID
|
||||
Addresses []multiaddr.Multiaddr
|
||||
Agent string
|
||||
Role string
|
||||
LastSeen time.Time
|
||||
ID peer.ID
|
||||
Addresses []multiaddr.Multiaddr
|
||||
Agent string
|
||||
Role string
|
||||
LastSeen time.Time
|
||||
Capabilities []string
|
||||
}
|
||||
|
||||
@@ -74,11 +77,16 @@ func DefaultConfig() *Config {
|
||||
ProtocolPrefix: "/CHORUS",
|
||||
BootstrapTimeout: 30 * time.Second,
|
||||
DiscoveryInterval: 60 * time.Second,
|
||||
Mode: dht.ModeAuto,
|
||||
AutoBootstrap: true,
|
||||
Mode: dht.ModeAuto,
|
||||
AutoBootstrap: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewDHT is a backward compatible helper that delegates to NewLibP2PDHT.
|
||||
func NewDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||
return NewLibP2PDHT(ctx, host, opts...)
|
||||
}
|
||||
|
||||
// NewLibP2PDHT creates a new LibP2PDHT instance
|
||||
func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||
config := DefaultConfig()
|
||||
@@ -105,6 +113,7 @@ func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PD
|
||||
ctx: dhtCtx,
|
||||
cancel: cancel,
|
||||
config: config,
|
||||
startTime: time.Now(),
|
||||
knownPeers: make(map[peer.ID]*PeerInfo),
|
||||
}
|
||||
|
||||
@@ -271,23 +280,24 @@ func (d *LibP2PDHT) FindProviders(ctx context.Context, key string, limit int) ([
|
||||
return nil, fmt.Errorf("failed to create CID from key: %w", err)
|
||||
}
|
||||
|
||||
// Find providers (FindProviders returns a channel and an error)
|
||||
providersChan, err := d.kdht.FindProviders(ctx, keyCID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find providers: %w", err)
|
||||
maxProviders := limit
|
||||
if maxProviders <= 0 {
|
||||
maxProviders = defaultProviderResultLimit
|
||||
}
|
||||
|
||||
// Collect providers from channel
|
||||
providers := make([]peer.AddrInfo, 0, limit)
|
||||
// TODO: Fix libp2p FindProviders channel type mismatch
|
||||
// The channel appears to return int instead of peer.AddrInfo in this version
|
||||
_ = providersChan // Avoid unused variable error
|
||||
// for providerInfo := range providersChan {
|
||||
// providers = append(providers, providerInfo)
|
||||
// if len(providers) >= limit {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
providerCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
providersChan := d.kdht.FindProvidersAsync(providerCtx, keyCID, maxProviders)
|
||||
providers := make([]peer.AddrInfo, 0, maxProviders)
|
||||
|
||||
for providerInfo := range providersChan {
|
||||
providers = append(providers, providerInfo)
|
||||
if limit > 0 && len(providers) >= limit {
|
||||
cancel()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return providers, nil
|
||||
}
|
||||
@@ -329,6 +339,22 @@ func (d *LibP2PDHT) GetConnectedPeers() []peer.ID {
|
||||
return d.kdht.Host().Network().Peers()
|
||||
}
|
||||
|
||||
// GetStats reports basic runtime statistics for the DHT
|
||||
func (d *LibP2PDHT) GetStats() DHTStats {
|
||||
stats := DHTStats{
|
||||
TotalPeers: len(d.GetConnectedPeers()),
|
||||
Uptime: time.Since(d.startTime),
|
||||
}
|
||||
|
||||
if d.replicationManager != nil {
|
||||
if metrics := d.replicationManager.GetMetrics(); metrics != nil {
|
||||
stats.TotalKeys = int(metrics.TotalKeys)
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
}
|
||||
|
||||
// RegisterPeer registers a peer with capability information
|
||||
func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) {
|
||||
d.peersMutex.Lock()
|
||||
@@ -617,6 +643,11 @@ func (d *LibP2PDHT) IsReplicationEnabled() bool {
|
||||
return d.replicationManager != nil
|
||||
}
|
||||
|
||||
// ReplicationManager returns the underlying replication manager if enabled.
|
||||
func (d *LibP2PDHT) ReplicationManager() *ReplicationManager {
|
||||
return d.replicationManager
|
||||
}
|
||||
|
||||
// Close shuts down the DHT
|
||||
func (d *LibP2PDHT) Close() error {
|
||||
// Stop replication manager first
|
||||
|
||||
@@ -2,546 +2,155 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libp2p/go-libp2p"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
dhtmode "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/libp2p/go-libp2p/core/test"
|
||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
)
|
||||
|
||||
type harness struct {
|
||||
ctx context.Context
|
||||
host libp2pHost
|
||||
dht *LibP2PDHT
|
||||
}
|
||||
|
||||
type libp2pHost interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
func newHarness(t *testing.T, opts ...Option) *harness {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("failed to create libp2p host: %v", err)
|
||||
}
|
||||
|
||||
options := append([]Option{WithAutoBootstrap(false)}, opts...)
|
||||
d, err := NewLibP2PDHT(ctx, host, options...)
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
|
||||
t.Cleanup(func() {
|
||||
d.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
})
|
||||
|
||||
return &harness{ctx: ctx, host: host, dht: d}
|
||||
}
|
||||
|
||||
func TestDefaultConfig(t *testing.T) {
|
||||
config := DefaultConfig()
|
||||
cfg := DefaultConfig()
|
||||
|
||||
if config.ProtocolPrefix != "/CHORUS" {
|
||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", config.ProtocolPrefix)
|
||||
if cfg.ProtocolPrefix != "/CHORUS" {
|
||||
t.Fatalf("expected protocol prefix '/CHORUS', got %s", cfg.ProtocolPrefix)
|
||||
}
|
||||
|
||||
if config.BootstrapTimeout != 30*time.Second {
|
||||
t.Errorf("expected bootstrap timeout 30s, got %v", config.BootstrapTimeout)
|
||||
if cfg.BootstrapTimeout != 30*time.Second {
|
||||
t.Fatalf("expected bootstrap timeout 30s, got %v", cfg.BootstrapTimeout)
|
||||
}
|
||||
|
||||
if config.Mode != dht.ModeAuto {
|
||||
t.Errorf("expected mode auto, got %v", config.Mode)
|
||||
if cfg.Mode != dhtmode.ModeAuto {
|
||||
t.Fatalf("expected mode auto, got %v", cfg.Mode)
|
||||
}
|
||||
|
||||
if !config.AutoBootstrap {
|
||||
t.Error("expected auto bootstrap to be enabled")
|
||||
if !cfg.AutoBootstrap {
|
||||
t.Fatal("expected auto bootstrap to be enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewDHT(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a test host
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Test with default options
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.host != host {
|
||||
t.Error("host not set correctly")
|
||||
}
|
||||
|
||||
if d.config.ProtocolPrefix != "/CHORUS" {
|
||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", d.config.ProtocolPrefix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDHTWithOptions(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Test with custom options
|
||||
d, err := NewDHT(ctx, host,
|
||||
func TestWithOptionsOverridesDefaults(t *testing.T) {
|
||||
h := newHarness(t,
|
||||
WithProtocolPrefix("/custom"),
|
||||
WithMode(dht.ModeClient),
|
||||
WithBootstrapTimeout(60*time.Second),
|
||||
WithDiscoveryInterval(120*time.Second),
|
||||
WithAutoBootstrap(false),
|
||||
WithDiscoveryInterval(2*time.Minute),
|
||||
WithBootstrapTimeout(45*time.Second),
|
||||
WithMode(dhtmode.ModeClient),
|
||||
WithAutoBootstrap(true),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.config.ProtocolPrefix != "/custom" {
|
||||
t.Errorf("expected protocol prefix '/custom', got %s", d.config.ProtocolPrefix)
|
||||
cfg := h.dht.config
|
||||
|
||||
if cfg.ProtocolPrefix != "/custom" {
|
||||
t.Fatalf("expected protocol prefix '/custom', got %s", cfg.ProtocolPrefix)
|
||||
}
|
||||
|
||||
if d.config.Mode != dht.ModeClient {
|
||||
t.Errorf("expected mode client, got %v", d.config.Mode)
|
||||
if cfg.DiscoveryInterval != 2*time.Minute {
|
||||
t.Fatalf("expected discovery interval 2m, got %v", cfg.DiscoveryInterval)
|
||||
}
|
||||
|
||||
if d.config.BootstrapTimeout != 60*time.Second {
|
||||
t.Errorf("expected bootstrap timeout 60s, got %v", d.config.BootstrapTimeout)
|
||||
if cfg.BootstrapTimeout != 45*time.Second {
|
||||
t.Fatalf("expected bootstrap timeout 45s, got %v", cfg.BootstrapTimeout)
|
||||
}
|
||||
|
||||
if d.config.DiscoveryInterval != 120*time.Second {
|
||||
t.Errorf("expected discovery interval 120s, got %v", d.config.DiscoveryInterval)
|
||||
if cfg.Mode != dhtmode.ModeClient {
|
||||
t.Fatalf("expected mode client, got %v", cfg.Mode)
|
||||
}
|
||||
|
||||
if d.config.AutoBootstrap {
|
||||
t.Error("expected auto bootstrap to be disabled")
|
||||
if !cfg.AutoBootstrap {
|
||||
t.Fatal("expected auto bootstrap to remain enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithBootstrapPeersFromStrings(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
func TestProvideRequiresBootstrap(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
bootstrapAddrs := []string{
|
||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1",
|
||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2",
|
||||
err := h.dht.Provide(h.ctx, "key")
|
||||
if err == nil {
|
||||
t.Fatal("expected Provide to fail when not bootstrapped")
|
||||
}
|
||||
|
||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if len(d.config.BootstrapPeers) != 2 {
|
||||
t.Errorf("expected 2 bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithBootstrapPeersFromStringsInvalid(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
// Include invalid addresses - they should be filtered out
|
||||
bootstrapAddrs := []string{
|
||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", // valid
|
||||
"invalid-address", // invalid
|
||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", // valid
|
||||
}
|
||||
|
||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should have filtered out the invalid address
|
||||
if len(d.config.BootstrapPeers) != 2 {
|
||||
t.Errorf("expected 2 valid bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBootstrapWithoutPeers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Bootstrap should use default IPFS peers when none configured
|
||||
err = d.Bootstrap()
|
||||
// This might fail in test environment without network access, but should not panic
|
||||
if err != nil {
|
||||
// Expected in test environment
|
||||
t.Logf("Bootstrap failed as expected in test environment: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsBootstrapped(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should not be bootstrapped initially
|
||||
if d.IsBootstrapped() {
|
||||
t.Error("DHT should not be bootstrapped initially")
|
||||
if !strings.Contains(err.Error(), "not bootstrapped") {
|
||||
t.Fatalf("expected error to indicate bootstrap requirement, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRegisterPeer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
h := newHarness(t)
|
||||
|
||||
peerID := test.RandPeerIDFatal(t)
|
||||
agent := "claude"
|
||||
role := "frontend"
|
||||
capabilities := []string{"react", "javascript"}
|
||||
|
||||
d.RegisterPeer(peerID, agent, role, capabilities)
|
||||
h.dht.RegisterPeer(peerID, "apollo", "platform", []string{"go"})
|
||||
|
||||
knownPeers := d.GetKnownPeers()
|
||||
if len(knownPeers) != 1 {
|
||||
t.Errorf("expected 1 known peer, got %d", len(knownPeers))
|
||||
peers := h.dht.GetKnownPeers()
|
||||
|
||||
info, ok := peers[peerID]
|
||||
if !ok {
|
||||
t.Fatalf("expected peer to be tracked")
|
||||
}
|
||||
|
||||
peerInfo, exists := knownPeers[peerID]
|
||||
if !exists {
|
||||
t.Error("peer not found in known peers")
|
||||
if info.Agent != "apollo" {
|
||||
t.Fatalf("expected agent apollo, got %s", info.Agent)
|
||||
}
|
||||
|
||||
if peerInfo.Agent != agent {
|
||||
t.Errorf("expected agent %s, got %s", agent, peerInfo.Agent)
|
||||
if info.Role != "platform" {
|
||||
t.Fatalf("expected role platform, got %s", info.Role)
|
||||
}
|
||||
|
||||
if peerInfo.Role != role {
|
||||
t.Errorf("expected role %s, got %s", role, peerInfo.Role)
|
||||
}
|
||||
|
||||
if len(peerInfo.Capabilities) != len(capabilities) {
|
||||
t.Errorf("expected %d capabilities, got %d", len(capabilities), len(peerInfo.Capabilities))
|
||||
if len(info.Capabilities) != 1 || info.Capabilities[0] != "go" {
|
||||
t.Fatalf("expected capability go, got %v", info.Capabilities)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetConnectedPeers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
func TestGetStatsProvidesUptime(t *testing.T) {
|
||||
h := newHarness(t)
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
stats := h.dht.GetStats()
|
||||
|
||||
if stats.TotalPeers != 0 {
|
||||
t.Fatalf("expected zero peers, got %d", stats.TotalPeers)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Initially should have no connected peers
|
||||
peers := d.GetConnectedPeers()
|
||||
if len(peers) != 0 {
|
||||
t.Errorf("expected 0 connected peers, got %d", len(peers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPutAndGetValue(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
key := "test-key"
|
||||
value := []byte("test-value")
|
||||
|
||||
err = d.PutValue(ctx, key, value)
|
||||
if err == nil {
|
||||
t.Error("PutValue should fail when DHT not bootstrapped")
|
||||
}
|
||||
|
||||
_, err = d.GetValue(ctx, key)
|
||||
if err == nil {
|
||||
t.Error("GetValue should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProvideAndFindProviders(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
key := "test-service"
|
||||
|
||||
err = d.Provide(ctx, key)
|
||||
if err == nil {
|
||||
t.Error("Provide should fail when DHT not bootstrapped")
|
||||
}
|
||||
|
||||
_, err = d.FindProviders(ctx, key, 10)
|
||||
if err == nil {
|
||||
t.Error("FindProviders should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPeer(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Test without bootstrap (should fail)
|
||||
peerID := test.RandPeerIDFatal(t)
|
||||
|
||||
_, err = d.FindPeer(ctx, peerID)
|
||||
if err == nil {
|
||||
t.Error("FindPeer should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindPeersByRole(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Register some local peers
|
||||
peerID1 := test.RandPeerIDFatal(t)
|
||||
peerID2 := test.RandPeerIDFatal(t)
|
||||
|
||||
d.RegisterPeer(peerID1, "claude", "frontend", []string{"react"})
|
||||
d.RegisterPeer(peerID2, "claude", "backend", []string{"go"})
|
||||
|
||||
// Find frontend peers
|
||||
frontendPeers, err := d.FindPeersByRole(ctx, "frontend")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to find peers by role: %v", err)
|
||||
}
|
||||
|
||||
if len(frontendPeers) != 1 {
|
||||
t.Errorf("expected 1 frontend peer, got %d", len(frontendPeers))
|
||||
}
|
||||
|
||||
if frontendPeers[0].ID != peerID1 {
|
||||
t.Error("wrong peer returned for frontend role")
|
||||
}
|
||||
|
||||
// Find all peers with wildcard
|
||||
allPeers, err := d.FindPeersByRole(ctx, "*")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to find all peers: %v", err)
|
||||
}
|
||||
|
||||
if len(allPeers) != 2 {
|
||||
t.Errorf("expected 2 peers with wildcard, got %d", len(allPeers))
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnnounceRole(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.AnnounceRole(ctx, "frontend")
|
||||
if err == nil {
|
||||
t.Error("AnnounceRole should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnnounceCapability(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.AnnounceCapability(ctx, "react")
|
||||
if err == nil {
|
||||
t.Error("AnnounceCapability should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRoutingTable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
rt := d.GetRoutingTable()
|
||||
if rt == nil {
|
||||
t.Error("routing table should not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDHTSize(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
size := d.GetDHTSize()
|
||||
// Should be 0 or small initially
|
||||
if size < 0 {
|
||||
t.Errorf("DHT size should be non-negative, got %d", size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRefreshRoutingTable(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
// Should fail when not bootstrapped
|
||||
err = d.RefreshRoutingTable()
|
||||
if err == nil {
|
||||
t.Error("RefreshRoutingTable should fail when DHT not bootstrapped")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHost(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
defer d.Close()
|
||||
|
||||
if d.Host() != host {
|
||||
t.Error("Host() should return the same host instance")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClose(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
host, err := libp2p.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test host: %v", err)
|
||||
}
|
||||
defer host.Close()
|
||||
|
||||
d, err := NewDHT(ctx, host)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create DHT: %v", err)
|
||||
}
|
||||
|
||||
// Should close without error
|
||||
err = d.Close()
|
||||
if err != nil {
|
||||
t.Errorf("Close() failed: %v", err)
|
||||
if stats.Uptime < 0 {
|
||||
t.Fatalf("expected non-negative uptime, got %v", stats.Uptime)
|
||||
}
|
||||
}
|
||||
@@ -2,559 +2,155 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
)
|
||||
|
||||
// TestDHTSecurityPolicyEnforcement tests security policy enforcement in DHT operations
|
||||
func TestDHTSecurityPolicyEnforcement(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
type securityTestCase struct {
|
||||
name string
|
||||
role string
|
||||
address string
|
||||
contentType string
|
||||
expectSuccess bool
|
||||
expectErrHint string
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
currentRole string
|
||||
operation string
|
||||
ucxlAddress string
|
||||
contentType string
|
||||
expectSuccess bool
|
||||
expectedError string
|
||||
}{
|
||||
// Store operation tests
|
||||
func newTestEncryptedStorage(cfg *config.Config) *EncryptedDHTStorage {
|
||||
return &EncryptedDHTStorage{
|
||||
ctx: context.Background(),
|
||||
config: cfg,
|
||||
nodeID: "test-node",
|
||||
cache: make(map[string]*CachedEntry),
|
||||
metrics: &StorageMetrics{LastUpdate: time.Now()},
|
||||
}
|
||||
}
|
||||
|
||||
func TestCheckStoreAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "admin_can_store_all_content",
|
||||
currentRole: "admin",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:admin:system:security_audit",
|
||||
name: "backend developer can store",
|
||||
role: "backend_developer",
|
||||
address: "agent1:backend_developer:api:endpoint",
|
||||
contentType: "decision",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "backend_developer_can_store_backend_content",
|
||||
currentRole: "backend_developer",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:backend_developer:api:endpoint_design",
|
||||
contentType: "suggestion",
|
||||
name: "project manager can store",
|
||||
role: "project_manager",
|
||||
address: "agent1:project_manager:plan:milestone",
|
||||
contentType: "decision",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "readonly_role_cannot_store",
|
||||
currentRole: "readonly_user",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
||||
contentType: "suggestion",
|
||||
expectSuccess: false,
|
||||
expectedError: "read-only authority",
|
||||
name: "read only user cannot store",
|
||||
role: "readonly_user",
|
||||
address: "agent1:readonly_user:note:observation",
|
||||
contentType: "note",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "read-only authority",
|
||||
},
|
||||
{
|
||||
name: "unknown_role_cannot_store",
|
||||
currentRole: "invalid_role",
|
||||
operation: "store",
|
||||
ucxlAddress: "agent1:invalid_role:project:task",
|
||||
contentType: "decision",
|
||||
expectSuccess: false,
|
||||
expectedError: "unknown creator role",
|
||||
},
|
||||
|
||||
// Retrieve operation tests
|
||||
{
|
||||
name: "any_valid_role_can_retrieve",
|
||||
currentRole: "qa_engineer",
|
||||
operation: "retrieve",
|
||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "unknown_role_cannot_retrieve",
|
||||
currentRole: "nonexistent_role",
|
||||
operation: "retrieve",
|
||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
||||
expectSuccess: false,
|
||||
expectedError: "unknown current role",
|
||||
},
|
||||
|
||||
// Announce operation tests
|
||||
{
|
||||
name: "coordination_role_can_announce",
|
||||
currentRole: "senior_software_architect",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:senior_software_architect:architecture:blueprint",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "decision_role_can_announce",
|
||||
currentRole: "security_expert",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:security_expert:security:policy",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "suggestion_role_cannot_announce",
|
||||
currentRole: "suggestion_only_role",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:suggestion_only_role:project:idea",
|
||||
expectSuccess: false,
|
||||
expectedError: "lacks authority",
|
||||
},
|
||||
{
|
||||
name: "readonly_role_cannot_announce",
|
||||
currentRole: "readonly_user",
|
||||
operation: "announce",
|
||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
||||
expectSuccess: false,
|
||||
expectedError: "lacks authority",
|
||||
name: "unknown role rejected",
|
||||
role: "ghost_role",
|
||||
address: "agent1:ghost_role:context",
|
||||
contentType: "decision",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown creator role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create test configuration
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tc.currentRole,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-security-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
// Create mock encrypted storage
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
var err error
|
||||
switch tc.operation {
|
||||
case "store":
|
||||
err = eds.checkStoreAccessPolicy(tc.currentRole, tc.ucxlAddress, tc.contentType)
|
||||
case "retrieve":
|
||||
err = eds.checkRetrieveAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
||||
case "announce":
|
||||
err = eds.checkAnnounceAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
||||
}
|
||||
|
||||
if tc.expectSuccess {
|
||||
if err != nil {
|
||||
t.Errorf("Expected %s operation to succeed for role %s, but got error: %v",
|
||||
tc.operation, tc.currentRole, err)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Expected %s operation to fail for role %s, but it succeeded",
|
||||
tc.operation, tc.currentRole)
|
||||
}
|
||||
if tc.expectedError != "" && !containsSubstring(err.Error(), tc.expectedError) {
|
||||
t.Errorf("Expected error to contain '%s', got '%s'", tc.expectedError, err.Error())
|
||||
}
|
||||
}
|
||||
err := eds.checkStoreAccessPolicy(tc.role, tc.address, tc.contentType)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDHTAuditLogging tests comprehensive audit logging for DHT operations
|
||||
func TestDHTAuditLogging(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
operation string
|
||||
role string
|
||||
ucxlAddress string
|
||||
success bool
|
||||
errorMsg string
|
||||
expectAudit bool
|
||||
}{
|
||||
func TestCheckRetrieveAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "successful_store_operation",
|
||||
operation: "store",
|
||||
role: "backend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:user_service",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
name: "qa engineer allowed",
|
||||
role: "qa_engineer",
|
||||
address: "agent1:backend_developer:api:tests",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "failed_store_operation",
|
||||
operation: "store",
|
||||
role: "readonly_user",
|
||||
ucxlAddress: "agent1:readonly_user:project:readonly_attempt",
|
||||
success: false,
|
||||
errorMsg: "read-only authority",
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "successful_retrieve_operation",
|
||||
operation: "retrieve",
|
||||
role: "frontend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:user_data",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "successful_announce_operation",
|
||||
operation: "announce",
|
||||
role: "senior_software_architect",
|
||||
ucxlAddress: "agent1:senior_software_architect:architecture:system_design",
|
||||
success: true,
|
||||
expectAudit: true,
|
||||
},
|
||||
{
|
||||
name: "audit_disabled_no_logging",
|
||||
operation: "store",
|
||||
role: "backend_developer",
|
||||
ucxlAddress: "agent1:backend_developer:api:no_audit",
|
||||
success: true,
|
||||
expectAudit: false,
|
||||
name: "unknown role rejected",
|
||||
role: "unknown",
|
||||
address: "agent1:backend_developer:api:tests",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown current role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
// Create configuration with audit logging
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tc.role,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: tc.expectAudit,
|
||||
AuditPath: "/tmp/test-dht-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
// Create mock encrypted storage
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Capture audit output
|
||||
auditCaptured := false
|
||||
|
||||
// Simulate audit operation
|
||||
switch tc.operation {
|
||||
case "store":
|
||||
// Mock the audit function call
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditStoreOperation(tc.ucxlAddress, tc.role, "test-content", 1024, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
case "retrieve":
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditRetrieveOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
case "announce":
|
||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
||||
eds.auditAnnounceOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
||||
auditCaptured = true
|
||||
}
|
||||
}
|
||||
|
||||
// Verify audit logging behavior
|
||||
if tc.expectAudit && !auditCaptured {
|
||||
t.Errorf("Expected audit logging for %s operation but none was captured", tc.operation)
|
||||
}
|
||||
if !tc.expectAudit && auditCaptured {
|
||||
t.Errorf("Expected no audit logging for %s operation but audit was captured", tc.operation)
|
||||
}
|
||||
err := eds.checkRetrieveAccessPolicy(tc.role, tc.address)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurityConfigIntegration tests integration with SecurityConfig
|
||||
func TestSecurityConfigIntegration(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
testConfigs := []struct {
|
||||
name string
|
||||
auditLogging bool
|
||||
auditPath string
|
||||
expectAuditWork bool
|
||||
}{
|
||||
func TestCheckAnnounceAccessPolicy(t *testing.T) {
|
||||
cases := []securityTestCase{
|
||||
{
|
||||
name: "audit_enabled_with_path",
|
||||
auditLogging: true,
|
||||
auditPath: "/tmp/test-audit-enabled.log",
|
||||
expectAuditWork: true,
|
||||
name: "architect can announce",
|
||||
role: "senior_software_architect",
|
||||
address: "agent1:senior_software_architect:architecture:proposal",
|
||||
expectSuccess: true,
|
||||
},
|
||||
{
|
||||
name: "audit_disabled",
|
||||
auditLogging: false,
|
||||
auditPath: "/tmp/test-audit-disabled.log",
|
||||
expectAuditWork: false,
|
||||
name: "suggestion role cannot announce",
|
||||
role: "suggestion_only_role",
|
||||
address: "agent1:suggestion_only_role:idea",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "lacks authority",
|
||||
},
|
||||
{
|
||||
name: "audit_enabled_no_path",
|
||||
auditLogging: true,
|
||||
auditPath: "",
|
||||
expectAuditWork: false,
|
||||
name: "unknown role rejected",
|
||||
role: "mystery",
|
||||
address: "agent1:mystery:topic",
|
||||
expectSuccess: false,
|
||||
expectErrHint: "unknown current role",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testConfigs {
|
||||
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||
eds := newTestEncryptedStorage(cfg)
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: tc.auditLogging,
|
||||
AuditPath: tc.auditPath,
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Test audit function behavior with different configurations
|
||||
auditWorked := func() bool {
|
||||
if !cfg.Security.AuditLogging || cfg.Security.AuditPath == "" {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}()
|
||||
|
||||
if auditWorked != tc.expectAuditWork {
|
||||
t.Errorf("Expected audit to work: %v, but got: %v", tc.expectAuditWork, auditWorked)
|
||||
}
|
||||
err := eds.checkAnnounceAccessPolicy(tc.role, tc.address)
|
||||
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRoleAuthorityHierarchy tests role authority hierarchy enforcement
|
||||
func TestRoleAuthorityHierarchy(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
func verifySecurityExpectation(t *testing.T, expectSuccess bool, hint string, err error) {
|
||||
t.Helper()
|
||||
|
||||
// Test role authority levels for different operations
|
||||
authorityTests := []struct {
|
||||
role string
|
||||
authorityLevel config.AuthorityLevel
|
||||
canStore bool
|
||||
canRetrieve bool
|
||||
canAnnounce bool
|
||||
}{
|
||||
{
|
||||
role: "admin",
|
||||
authorityLevel: config.AuthorityMaster,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "senior_software_architect",
|
||||
authorityLevel: config.AuthorityDecision,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "security_expert",
|
||||
authorityLevel: config.AuthorityCoordination,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: true,
|
||||
},
|
||||
{
|
||||
role: "backend_developer",
|
||||
authorityLevel: config.AuthoritySuggestion,
|
||||
canStore: true,
|
||||
canRetrieve: true,
|
||||
canAnnounce: false,
|
||||
},
|
||||
if expectSuccess {
|
||||
if err != nil {
|
||||
t.Fatalf("expected success, got error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
for _, tt := range authorityTests {
|
||||
t.Run(tt.role+"_authority_test", func(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: tt.role,
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-authority.log",
|
||||
},
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatal("expected error but got success")
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Test store permission
|
||||
storeErr := eds.checkStoreAccessPolicy(tt.role, "test:address", "content")
|
||||
if tt.canStore && storeErr != nil {
|
||||
t.Errorf("Role %s should be able to store but got error: %v", tt.role, storeErr)
|
||||
}
|
||||
if !tt.canStore && storeErr == nil {
|
||||
t.Errorf("Role %s should not be able to store but operation succeeded", tt.role)
|
||||
}
|
||||
|
||||
// Test retrieve permission
|
||||
retrieveErr := eds.checkRetrieveAccessPolicy(tt.role, "test:address")
|
||||
if tt.canRetrieve && retrieveErr != nil {
|
||||
t.Errorf("Role %s should be able to retrieve but got error: %v", tt.role, retrieveErr)
|
||||
}
|
||||
if !tt.canRetrieve && retrieveErr == nil {
|
||||
t.Errorf("Role %s should not be able to retrieve but operation succeeded", tt.role)
|
||||
}
|
||||
|
||||
// Test announce permission
|
||||
announceErr := eds.checkAnnounceAccessPolicy(tt.role, "test:address")
|
||||
if tt.canAnnounce && announceErr != nil {
|
||||
t.Errorf("Role %s should be able to announce but got error: %v", tt.role, announceErr)
|
||||
}
|
||||
if !tt.canAnnounce && announceErr == nil {
|
||||
t.Errorf("Role %s should not be able to announce but operation succeeded", tt.role)
|
||||
}
|
||||
})
|
||||
if hint != "" && !strings.Contains(err.Error(), hint) {
|
||||
t.Fatalf("expected error to contain %q, got %q", hint, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurityMetrics tests security-related metrics
|
||||
func TestSecurityMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/test-metrics.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
// Simulate some operations to generate metrics
|
||||
for i := 0; i < 5; i++ {
|
||||
eds.metrics.StoredItems++
|
||||
eds.metrics.RetrievedItems++
|
||||
eds.metrics.EncryptionOps++
|
||||
eds.metrics.DecryptionOps++
|
||||
}
|
||||
|
||||
metrics := eds.GetMetrics()
|
||||
|
||||
expectedMetrics := map[string]int64{
|
||||
"stored_items": 5,
|
||||
"retrieved_items": 5,
|
||||
"encryption_ops": 5,
|
||||
"decryption_ops": 5,
|
||||
}
|
||||
|
||||
for metricName, expectedValue := range expectedMetrics {
|
||||
if actualValue, ok := metrics[metricName]; !ok {
|
||||
t.Errorf("Expected metric %s to be present in metrics", metricName)
|
||||
} else if actualValue != expectedValue {
|
||||
t.Errorf("Expected %s to be %d, got %v", metricName, expectedValue, actualValue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func createMockEncryptedStorage(ctx context.Context, cfg *config.Config) *EncryptedDHTStorage {
|
||||
return &EncryptedDHTStorage{
|
||||
ctx: ctx,
|
||||
config: cfg,
|
||||
nodeID: "test-node-id",
|
||||
cache: make(map[string]*CachedEntry),
|
||||
metrics: &StorageMetrics{
|
||||
LastUpdate: time.Now(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func containsSubstring(str, substr string) bool {
|
||||
if len(substr) == 0 {
|
||||
return true
|
||||
}
|
||||
if len(str) < len(substr) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i <= len(str)-len(substr); i++ {
|
||||
if str[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Benchmarks for security performance
|
||||
|
||||
func BenchmarkSecurityPolicyChecks(b *testing.B) {
|
||||
ctx := context.Background()
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "bench-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/bench-security.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.Run("store_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkStoreAccessPolicy("backend_developer", "test:address", "content")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("retrieve_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkRetrieveAccessPolicy("backend_developer", "test:address")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("announce_policy_check", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.checkAnnounceAccessPolicy("senior_software_architect", "test:address")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkAuditOperations(b *testing.B) {
|
||||
ctx := context.Background()
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "bench-agent",
|
||||
Role: "backend_developer",
|
||||
},
|
||||
Security: config.SecurityConfig{
|
||||
KeyRotationDays: 90,
|
||||
AuditLogging: true,
|
||||
AuditPath: "/tmp/bench-audit.log",
|
||||
},
|
||||
}
|
||||
|
||||
eds := createMockEncryptedStorage(ctx, cfg)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
b.Run("store_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditStoreOperation("test:address", "backend_developer", "content", 1024, true, "")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("retrieve_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditRetrieveOperation("test:address", "backend_developer", true, "")
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("announce_audit", func(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
eds.auditAnnounceOperation("test:address", "backend_developer", true, "")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,14 +1,117 @@
|
||||
package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"chorus/pkg/config"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
"github.com/libp2p/go-libp2p/p2p/security/noise"
|
||||
"github.com/libp2p/go-libp2p/p2p/transport/tcp"
|
||||
"github.com/multiformats/go-multiaddr"
|
||||
)
|
||||
|
||||
// NewRealDHT creates a new real DHT implementation
|
||||
func NewRealDHT(config *config.HybridConfig) (DHT, error) {
|
||||
// TODO: Implement real DHT initialization
|
||||
// For now, return an error to indicate it's not yet implemented
|
||||
return nil, fmt.Errorf("real DHT implementation not yet available")
|
||||
// RealDHT wraps a libp2p-based DHT to satisfy the generic DHT interface.
|
||||
type RealDHT struct {
|
||||
cancel context.CancelFunc
|
||||
host host.Host
|
||||
dht *LibP2PDHT
|
||||
}
|
||||
|
||||
// NewRealDHT creates a new real DHT implementation backed by libp2p.
|
||||
func NewRealDHT(cfg *config.HybridConfig) (DHT, error) {
|
||||
if cfg == nil {
|
||||
cfg = &config.HybridConfig{}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0")
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to create listen address: %w", err)
|
||||
}
|
||||
|
||||
host, err := libp2p.New(
|
||||
libp2p.ListenAddrs(listenAddr),
|
||||
libp2p.Security(noise.ID, noise.New),
|
||||
libp2p.Transport(tcp.NewTCPTransport),
|
||||
libp2p.DefaultMuxers,
|
||||
libp2p.EnableRelay(),
|
||||
)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to create libp2p host: %w", err)
|
||||
}
|
||||
|
||||
opts := []Option{
|
||||
WithProtocolPrefix("/CHORUS"),
|
||||
}
|
||||
|
||||
if nodes := cfg.GetDHTBootstrapNodes(); len(nodes) > 0 {
|
||||
opts = append(opts, WithBootstrapPeersFromStrings(nodes))
|
||||
}
|
||||
|
||||
libp2pDHT, err := NewLibP2PDHT(ctx, host, opts...)
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to initialize libp2p DHT: %w", err)
|
||||
}
|
||||
|
||||
if err := libp2pDHT.Bootstrap(); err != nil {
|
||||
libp2pDHT.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
return nil, fmt.Errorf("failed to bootstrap DHT: %w", err)
|
||||
}
|
||||
|
||||
return &RealDHT{
|
||||
cancel: cancel,
|
||||
host: host,
|
||||
dht: libp2pDHT,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// PutValue stores a value in the DHT.
|
||||
func (r *RealDHT) PutValue(ctx context.Context, key string, value []byte) error {
|
||||
return r.dht.PutValue(ctx, key, value)
|
||||
}
|
||||
|
||||
// GetValue retrieves a value from the DHT.
|
||||
func (r *RealDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
|
||||
return r.dht.GetValue(ctx, key)
|
||||
}
|
||||
|
||||
// Provide announces that this node can provide the given key.
|
||||
func (r *RealDHT) Provide(ctx context.Context, key string) error {
|
||||
return r.dht.Provide(ctx, key)
|
||||
}
|
||||
|
||||
// FindProviders locates peers that can provide the specified key.
|
||||
func (r *RealDHT) FindProviders(ctx context.Context, key string, limit int) ([]peer.AddrInfo, error) {
|
||||
return r.dht.FindProviders(ctx, key, limit)
|
||||
}
|
||||
|
||||
// GetStats exposes runtime metrics for the real DHT.
|
||||
func (r *RealDHT) GetStats() DHTStats {
|
||||
return r.dht.GetStats()
|
||||
}
|
||||
|
||||
// Close releases resources associated with the DHT.
|
||||
func (r *RealDHT) Close() error {
|
||||
r.cancel()
|
||||
|
||||
var errs []error
|
||||
if err := r.dht.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := r.host.Close(); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
@@ -2,159 +2,106 @@ package dht
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestReplicationManager tests basic replication manager functionality
|
||||
func TestReplicationManager(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
func newReplicationManagerForTest(t *testing.T) *ReplicationManager {
|
||||
t.Helper()
|
||||
|
||||
// Create a mock DHT for testing
|
||||
mockDHT := NewMockDHTInterface()
|
||||
|
||||
// Create replication manager
|
||||
config := DefaultReplicationConfig()
|
||||
config.ReprovideInterval = 1 * time.Second // Short interval for testing
|
||||
config.CleanupInterval = 1 * time.Second
|
||||
|
||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), config)
|
||||
defer rm.Stop()
|
||||
|
||||
// Test adding content
|
||||
testKey := "test-content-key"
|
||||
testSize := int64(1024)
|
||||
testPriority := 5
|
||||
|
||||
err := rm.AddContent(testKey, testSize, testPriority)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add content: %v", err)
|
||||
cfg := &ReplicationConfig{
|
||||
ReplicationFactor: 3,
|
||||
ReprovideInterval: time.Hour,
|
||||
CleanupInterval: time.Hour,
|
||||
ProviderTTL: 30 * time.Minute,
|
||||
MaxProvidersPerKey: 5,
|
||||
EnableAutoReplication: false,
|
||||
EnableReprovide: false,
|
||||
MaxConcurrentReplications: 1,
|
||||
}
|
||||
|
||||
// Test getting replication status
|
||||
status, err := rm.GetReplicationStatus(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get replication status: %v", err)
|
||||
rm := NewReplicationManager(context.Background(), nil, cfg)
|
||||
t.Cleanup(func() {
|
||||
if rm.reprovideTimer != nil {
|
||||
rm.reprovideTimer.Stop()
|
||||
}
|
||||
if rm.cleanupTimer != nil {
|
||||
rm.cleanupTimer.Stop()
|
||||
}
|
||||
rm.cancel()
|
||||
})
|
||||
return rm
|
||||
}
|
||||
|
||||
func TestAddContentRegistersKey(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("expected AddContent to succeed, got error: %v", err)
|
||||
}
|
||||
|
||||
if status.Key != testKey {
|
||||
t.Errorf("Expected key %s, got %s", testKey, status.Key)
|
||||
rm.keysMutex.RLock()
|
||||
record, ok := rm.contentKeys["ucxl://example/path"]
|
||||
rm.keysMutex.RUnlock()
|
||||
|
||||
if !ok {
|
||||
t.Fatal("expected content key to be registered")
|
||||
}
|
||||
|
||||
if status.Size != testSize {
|
||||
t.Errorf("Expected size %d, got %d", testSize, status.Size)
|
||||
}
|
||||
|
||||
if status.Priority != testPriority {
|
||||
t.Errorf("Expected priority %d, got %d", testPriority, status.Priority)
|
||||
}
|
||||
|
||||
// Test providing content
|
||||
err = rm.ProvideContent(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to provide content: %v", err)
|
||||
}
|
||||
|
||||
// Test metrics
|
||||
metrics := rm.GetMetrics()
|
||||
if metrics.TotalKeys != 1 {
|
||||
t.Errorf("Expected 1 total key, got %d", metrics.TotalKeys)
|
||||
}
|
||||
|
||||
// Test finding providers
|
||||
providers, err := rm.FindProviders(ctx, testKey, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to find providers: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Found %d providers for key %s", len(providers), testKey)
|
||||
|
||||
// Test removing content
|
||||
err = rm.RemoveContent(testKey)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to remove content: %v", err)
|
||||
}
|
||||
|
||||
// Verify content was removed
|
||||
metrics = rm.GetMetrics()
|
||||
if metrics.TotalKeys != 0 {
|
||||
t.Errorf("Expected 0 total keys after removal, got %d", metrics.TotalKeys)
|
||||
if record.Size != 512 {
|
||||
t.Fatalf("expected size 512, got %d", record.Size)
|
||||
}
|
||||
}
|
||||
|
||||
// TestLibP2PDHTReplication tests DHT replication functionality
|
||||
func TestLibP2PDHTReplication(t *testing.T) {
|
||||
// This would normally require a real libp2p setup
|
||||
// For now, just test the interface methods exist
|
||||
func TestRemoveContentClearsTracking(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
// Mock test - in a real implementation, you'd set up actual libp2p hosts
|
||||
t.Log("DHT replication interface methods are implemented")
|
||||
|
||||
// Example of how the replication would be used:
|
||||
// 1. Add content for replication
|
||||
// 2. Content gets automatically provided to the DHT
|
||||
// 3. Other nodes can discover this node as a provider
|
||||
// 4. Periodic reproviding ensures content availability
|
||||
// 5. Replication metrics track system health
|
||||
}
|
||||
|
||||
// TestReplicationConfig tests replication configuration
|
||||
func TestReplicationConfig(t *testing.T) {
|
||||
config := DefaultReplicationConfig()
|
||||
|
||||
// Test default values
|
||||
if config.ReplicationFactor != 3 {
|
||||
t.Errorf("Expected default replication factor 3, got %d", config.ReplicationFactor)
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("AddContent returned error: %v", err)
|
||||
}
|
||||
|
||||
if config.ReprovideInterval != 12*time.Hour {
|
||||
t.Errorf("Expected default reprovide interval 12h, got %v", config.ReprovideInterval)
|
||||
if err := rm.RemoveContent("ucxl://example/path"); err != nil {
|
||||
t.Fatalf("RemoveContent returned error: %v", err)
|
||||
}
|
||||
|
||||
if !config.EnableAutoReplication {
|
||||
t.Error("Expected auto replication to be enabled by default")
|
||||
}
|
||||
rm.keysMutex.RLock()
|
||||
_, exists := rm.contentKeys["ucxl://example/path"]
|
||||
rm.keysMutex.RUnlock()
|
||||
|
||||
if !config.EnableReprovide {
|
||||
t.Error("Expected reprovide to be enabled by default")
|
||||
if exists {
|
||||
t.Fatal("expected content key to be removed")
|
||||
}
|
||||
}
|
||||
|
||||
// TestProviderInfo tests provider information tracking
|
||||
func TestProviderInfo(t *testing.T) {
|
||||
// Test distance calculation
|
||||
key := []byte("test-key")
|
||||
peerID := "test-peer-id"
|
||||
func TestGetReplicationStatusReturnsCopy(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
distance := calculateDistance(key, []byte(peerID))
|
||||
|
||||
// Distance should be non-zero for different inputs
|
||||
if distance == 0 {
|
||||
t.Error("Expected non-zero distance for different inputs")
|
||||
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||
t.Fatalf("AddContent returned error: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Distance between key and peer: %d", distance)
|
||||
status, err := rm.GetReplicationStatus("ucxl://example/path")
|
||||
if err != nil {
|
||||
t.Fatalf("GetReplicationStatus returned error: %v", err)
|
||||
}
|
||||
|
||||
if status.Key != "ucxl://example/path" {
|
||||
t.Fatalf("expected status key to match, got %s", status.Key)
|
||||
}
|
||||
|
||||
// Mutating status should not affect internal state
|
||||
status.HealthyProviders = 99
|
||||
internal, _ := rm.GetReplicationStatus("ucxl://example/path")
|
||||
if internal.HealthyProviders == 99 {
|
||||
t.Fatal("expected GetReplicationStatus to return a copy")
|
||||
}
|
||||
}
|
||||
|
||||
// TestReplicationMetrics tests metrics collection
|
||||
func TestReplicationMetrics(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mockDHT := NewMockDHTInterface()
|
||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), DefaultReplicationConfig())
|
||||
defer rm.Stop()
|
||||
|
||||
// Add some content
|
||||
for i := 0; i < 3; i++ {
|
||||
key := fmt.Sprintf("test-key-%d", i)
|
||||
rm.AddContent(key, int64(1000+i*100), i+1)
|
||||
}
|
||||
func TestGetMetricsReturnsSnapshot(t *testing.T) {
|
||||
rm := newReplicationManagerForTest(t)
|
||||
|
||||
metrics := rm.GetMetrics()
|
||||
|
||||
if metrics.TotalKeys != 3 {
|
||||
t.Errorf("Expected 3 total keys, got %d", metrics.TotalKeys)
|
||||
if metrics == rm.metrics {
|
||||
t.Fatal("expected GetMetrics to return a copy of metrics")
|
||||
}
|
||||
|
||||
t.Logf("Replication metrics: %+v", metrics)
|
||||
}
|
||||
@@ -19,8 +19,8 @@ import (
|
||||
type ElectionTrigger string
|
||||
|
||||
const (
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
||||
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
||||
TriggerManual ElectionTrigger = "manual_trigger"
|
||||
@@ -30,30 +30,35 @@ const (
|
||||
type ElectionState string
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
electionTopic = "CHORUS/election/v1"
|
||||
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
|
||||
)
|
||||
|
||||
const (
|
||||
StateIdle ElectionState = "idle"
|
||||
StateDiscovering ElectionState = "discovering"
|
||||
StateElecting ElectionState = "electing"
|
||||
StateReconstructing ElectionState = "reconstructing_keys"
|
||||
StateComplete ElectionState = "complete"
|
||||
StateComplete ElectionState = "complete"
|
||||
)
|
||||
|
||||
// AdminCandidate represents a node candidate for admin role
|
||||
type AdminCandidate struct {
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"node_id"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
Uptime time.Duration `json:"uptime"`
|
||||
Resources ResourceMetrics `json:"resources"`
|
||||
Experience time.Duration `json:"experience"`
|
||||
Score float64 `json:"score"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// ResourceMetrics holds node resource information for election scoring
|
||||
type ResourceMetrics struct {
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
CPUUsage float64 `json:"cpu_usage"`
|
||||
MemoryUsage float64 `json:"memory_usage"`
|
||||
DiskUsage float64 `json:"disk_usage"`
|
||||
NetworkQuality float64 `json:"network_quality"`
|
||||
}
|
||||
|
||||
@@ -68,33 +73,33 @@ type ElectionMessage struct {
|
||||
|
||||
// ElectionManager handles admin election coordination
|
||||
type ElectionManager struct {
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
config *config.Config
|
||||
host libp2p.Host
|
||||
pubsub *pubsub.PubSub
|
||||
nodeID string
|
||||
|
||||
// Election state
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
mu sync.RWMutex
|
||||
state ElectionState
|
||||
currentTerm int
|
||||
lastHeartbeat time.Time
|
||||
currentAdmin string
|
||||
candidates map[string]*AdminCandidate
|
||||
votes map[string]string // voter -> candidate
|
||||
|
||||
// Timers and channels
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
heartbeatTimer *time.Timer
|
||||
discoveryTimer *time.Timer
|
||||
electionTimer *time.Timer
|
||||
electionTrigger chan ElectionTrigger
|
||||
|
||||
// Heartbeat management
|
||||
heartbeatManager *HeartbeatManager
|
||||
heartbeatManager *HeartbeatManager
|
||||
|
||||
// Callbacks
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onAdminChanged func(oldAdmin, newAdmin string)
|
||||
onElectionComplete func(winner string)
|
||||
|
||||
startTime time.Time
|
||||
@@ -102,12 +107,12 @@ type ElectionManager struct {
|
||||
|
||||
// HeartbeatManager manages admin heartbeat lifecycle
|
||||
type HeartbeatManager struct {
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
mu sync.Mutex
|
||||
isRunning bool
|
||||
stopCh chan struct{}
|
||||
ticker *time.Ticker
|
||||
electionMgr *ElectionManager
|
||||
logger func(msg string, args ...interface{})
|
||||
}
|
||||
|
||||
// NewElectionManager creates a new election manager
|
||||
@@ -149,14 +154,17 @@ func NewElectionManager(
|
||||
func (em *ElectionManager) Start() error {
|
||||
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
||||
|
||||
// TODO: Subscribe to election-related messages - pubsub interface needs update
|
||||
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
// }
|
||||
//
|
||||
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
|
||||
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
// }
|
||||
if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleElectionMessage(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||
}
|
||||
|
||||
if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
|
||||
em.handleAdminHeartbeat(data)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// Start discovery process
|
||||
go em.startDiscoveryLoop()
|
||||
@@ -396,7 +404,7 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
||||
Experience: uptime, // For now, use uptime as experience
|
||||
Metadata: map[string]interface{}{
|
||||
"specialization": em.config.Agent.Specialization,
|
||||
"models": em.config.Agent.Models,
|
||||
"models": em.config.Agent.Models,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics {
|
||||
// TODO: Implement actual resource collection
|
||||
// For now, return simulated values
|
||||
return ResourceMetrics{
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
||||
}
|
||||
}
|
||||
@@ -457,10 +465,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
||||
capabilityScore = min(1.0, capabilityScore)
|
||||
|
||||
// Resource score - lower usage is better
|
||||
resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.MemoryUsage) * 0.3 +
|
||||
(1.0 - candidate.Resources.DiskUsage) * 0.2 +
|
||||
candidate.Resources.NetworkQuality * 0.2
|
||||
resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 +
|
||||
(1.0-candidate.Resources.MemoryUsage)*0.3 +
|
||||
(1.0-candidate.Resources.DiskUsage)*0.2 +
|
||||
candidate.Resources.NetworkQuality*0.2
|
||||
|
||||
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
||||
|
||||
@@ -839,10 +847,7 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
|
||||
return fmt.Errorf("failed to marshal election message: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/election/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
return em.pubsub.PublishRaw(electionTopic, data)
|
||||
}
|
||||
|
||||
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
||||
@@ -864,10 +869,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
||||
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
||||
}
|
||||
|
||||
// TODO: Fix pubsub interface
|
||||
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
|
||||
_ = data // Avoid unused variable
|
||||
return nil
|
||||
return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
|
||||
}
|
||||
|
||||
// min returns the minimum of two float64 values
|
||||
@@ -989,9 +991,9 @@ func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
||||
defer hm.mu.Unlock()
|
||||
|
||||
status := map[string]interface{}{
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
"running": hm.isRunning,
|
||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||
}
|
||||
|
||||
if hm.isRunning && hm.ticker != nil {
|
||||
|
||||
@@ -2,451 +2,185 @@ package election
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
pubsubpkg "chorus/pubsub"
|
||||
libp2p "github.com/libp2p/go-libp2p"
|
||||
)
|
||||
|
||||
func TestElectionManager_NewElectionManager(t *testing.T) {
|
||||
// newTestElectionManager wires a real libp2p host and PubSub instance so the
|
||||
// election manager exercises the same code paths used in production.
|
||||
func newTestElectionManager(t *testing.T) *ElectionManager {
|
||||
t.Helper()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||
if err != nil {
|
||||
cancel()
|
||||
t.Fatalf("failed to create libp2p host: %v", err)
|
||||
}
|
||||
|
||||
ps, err := pubsubpkg.NewPubSub(ctx, host, "", "")
|
||||
if err != nil {
|
||||
host.Close()
|
||||
cancel()
|
||||
t.Fatalf("failed to create pubsub: %v", err)
|
||||
}
|
||||
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
ID: host.ID().String(),
|
||||
Role: "context_admin",
|
||||
Capabilities: []string{"admin_election", "context_curation"},
|
||||
Models: []string{"meta/llama-3.1-8b-instruct"},
|
||||
Specialization: "coordination",
|
||||
},
|
||||
Security: config.SecurityConfig{},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
if em == nil {
|
||||
t.Fatal("Expected NewElectionManager to return non-nil manager")
|
||||
}
|
||||
em := NewElectionManager(ctx, cfg, host, ps, host.ID().String())
|
||||
|
||||
if em.nodeID != "test-node" {
|
||||
t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
em.Stop()
|
||||
ps.Close()
|
||||
host.Close()
|
||||
cancel()
|
||||
})
|
||||
|
||||
return em
|
||||
}
|
||||
|
||||
func TestNewElectionManagerInitialState(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if em.state != StateIdle {
|
||||
t.Errorf("Expected initial state to be StateIdle, got %v", em.state)
|
||||
t.Fatalf("expected initial state %q, got %q", StateIdle, em.state)
|
||||
}
|
||||
|
||||
if em.currentTerm != 0 {
|
||||
t.Fatalf("expected initial term 0, got %d", em.currentTerm)
|
||||
}
|
||||
|
||||
if em.nodeID == "" {
|
||||
t.Fatal("expected nodeID to be populated")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_StartElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestElectionManagerCanBeAdmin(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if !em.canBeAdmin() {
|
||||
t.Fatal("expected node to qualify for admin election")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Start election
|
||||
err := em.StartElection()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to start election: %v", err)
|
||||
}
|
||||
|
||||
// Verify state changed
|
||||
if em.state != StateCandidate {
|
||||
t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state)
|
||||
}
|
||||
|
||||
// Verify we added ourselves as a candidate
|
||||
em.mu.RLock()
|
||||
candidate, exists := em.candidates[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find ourselves as a candidate after starting election")
|
||||
}
|
||||
|
||||
if candidate.NodeID != em.nodeID {
|
||||
t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID)
|
||||
em.config.Agent.Capabilities = []string{"runtime_support"}
|
||||
if em.canBeAdmin() {
|
||||
t.Fatal("expected node without admin capabilities to be ineligible")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Vote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.candidates = map[string]*AdminCandidate{
|
||||
"candidate-1": {
|
||||
NodeID: "candidate-1",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.65,
|
||||
},
|
||||
"candidate-2": {
|
||||
NodeID: "candidate-2",
|
||||
PeerID: em.host.ID(),
|
||||
Score: 0.80,
|
||||
},
|
||||
}
|
||||
em.votes = map[string]string{
|
||||
"voter-a": "candidate-1",
|
||||
"voter-b": "candidate-2",
|
||||
"voter-c": "candidate-2",
|
||||
}
|
||||
em.mu.Unlock()
|
||||
|
||||
// Vote for the candidate
|
||||
err := em.Vote("candidate-1")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to vote: %v", err)
|
||||
}
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes[em.nodeID]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find our vote after voting")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected vote to be for 'candidate-1', got %s", vote)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_VoteInvalidCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Try to vote for non-existent candidate
|
||||
err := em.Vote("non-existent")
|
||||
if err == nil {
|
||||
t.Error("Expected error when voting for non-existent candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_AddCandidate(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "new-candidate",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Verify candidate was added
|
||||
em.mu.RLock()
|
||||
stored, exists := em.candidates["new-candidate"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected to find added candidate")
|
||||
}
|
||||
|
||||
if stored.NodeID != "new-candidate" {
|
||||
t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID)
|
||||
}
|
||||
|
||||
if stored.Score != 0.7 {
|
||||
t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinner(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates with different scores
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-3",
|
||||
Term: 1,
|
||||
Score: 0.7,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
}
|
||||
|
||||
// Add some votes
|
||||
em.votes["voter-1"] = "candidate-2"
|
||||
em.votes["voter-2"] = "candidate-2"
|
||||
em.votes["voter-3"] = "candidate-1"
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
t.Fatal("expected a winner to be selected")
|
||||
}
|
||||
|
||||
// candidate-2 should win with most votes (2 votes)
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID)
|
||||
t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add candidates but no votes - should fall back to highest score
|
||||
candidates := []*AdminCandidate{
|
||||
{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.6,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
{
|
||||
NodeID: "candidate-2",
|
||||
Term: 1,
|
||||
Score: 0.9, // Highest score
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
},
|
||||
}
|
||||
func TestHandleElectionMessageAddsCandidate(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
em.mu.Lock()
|
||||
for _, candidate := range candidates {
|
||||
em.candidates[candidate.NodeID] = candidate
|
||||
}
|
||||
em.currentTerm = 3
|
||||
em.state = StateElecting
|
||||
em.mu.Unlock()
|
||||
|
||||
// Find winner without any votes
|
||||
winner := em.findElectionWinner()
|
||||
|
||||
if winner == nil {
|
||||
t.Fatal("Expected findElectionWinner to return a winner")
|
||||
}
|
||||
|
||||
// candidate-2 should win with highest score
|
||||
if winner.NodeID != "candidate-2" {
|
||||
t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVote(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Add a candidate first
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
NodeID: "peer-2",
|
||||
PeerID: em.host.ID(),
|
||||
Capabilities: []string{"admin_election"},
|
||||
Uptime: time.Second,
|
||||
Score: 0.75,
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["candidate-1"] = candidate
|
||||
em.mu.Unlock()
|
||||
payload, err := json.Marshal(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal candidate: %v", err)
|
||||
}
|
||||
|
||||
var data map[string]interface{}
|
||||
if err := json.Unmarshal(payload, &data); err != nil {
|
||||
t.Fatalf("failed to unmarshal candidate payload: %v", err)
|
||||
}
|
||||
|
||||
// Create vote message
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: map[string]interface{}{
|
||||
"candidate": "candidate-1",
|
||||
},
|
||||
Type: "candidacy_announcement",
|
||||
NodeID: "peer-2",
|
||||
Timestamp: time.Now(),
|
||||
Term: 3,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
// Handle the vote
|
||||
em.handleElectionVote(msg)
|
||||
serialized, err := json.Marshal(msg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to marshal election message: %v", err)
|
||||
}
|
||||
|
||||
em.handleElectionMessage(serialized)
|
||||
|
||||
// Verify vote was recorded
|
||||
em.mu.RLock()
|
||||
vote, exists := em.votes["voter-1"]
|
||||
_, exists := em.candidates["peer-2"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if !exists {
|
||||
t.Error("Expected vote to be recorded after handling vote message")
|
||||
}
|
||||
|
||||
if vote != "candidate-1" {
|
||||
t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote)
|
||||
t.Fatal("expected candidacy announcement to register candidate")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) {
|
||||
em := newTestElectionManager(t)
|
||||
|
||||
if err := em.SendAdminHeartbeat(); err == nil {
|
||||
t.Fatal("expected error when non-admin sends heartbeat")
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Create vote message with invalid data
|
||||
msg := ElectionMessage{
|
||||
Type: MessageTypeVote,
|
||||
NodeID: "voter-1",
|
||||
Data: "invalid-data", // Should be map[string]interface{}
|
||||
}
|
||||
|
||||
// Handle the vote - should not crash
|
||||
em.handleElectionVote(msg)
|
||||
|
||||
// Verify no vote was recorded
|
||||
em.mu.RLock()
|
||||
_, exists := em.votes["voter-1"]
|
||||
em.mu.RUnlock()
|
||||
|
||||
if exists {
|
||||
t.Error("Expected no vote to be recorded with invalid data")
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_CompleteElection(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Set up election state
|
||||
em.mu.Lock()
|
||||
em.state = StateCandidate
|
||||
em.currentTerm = 1
|
||||
em.mu.Unlock()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "winner",
|
||||
Term: 1,
|
||||
Score: 0.9,
|
||||
Capabilities: []string{"admin", "leader"},
|
||||
LastSeen: time.Now(),
|
||||
if err := em.Start(); err != nil {
|
||||
t.Fatalf("failed to start election manager: %v", err)
|
||||
}
|
||||
|
||||
em.mu.Lock()
|
||||
em.candidates["winner"] = candidate
|
||||
em.currentAdmin = em.nodeID
|
||||
em.mu.Unlock()
|
||||
|
||||
// Complete election
|
||||
em.CompleteElection()
|
||||
|
||||
// Verify state reset
|
||||
em.mu.RLock()
|
||||
state := em.state
|
||||
em.mu.RUnlock()
|
||||
|
||||
if state != StateIdle {
|
||||
t.Errorf("Expected state to be StateIdle after completing election, got %v", state)
|
||||
}
|
||||
}
|
||||
|
||||
func TestElectionManager_Concurrency(t *testing.T) {
|
||||
cfg := &config.Config{
|
||||
Agent: config.AgentConfig{
|
||||
ID: "test-node",
|
||||
},
|
||||
}
|
||||
|
||||
em := NewElectionManager(cfg)
|
||||
|
||||
// Test concurrent access to vote and candidate operations
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Add a candidate
|
||||
candidate := &AdminCandidate{
|
||||
NodeID: "candidate-1",
|
||||
Term: 1,
|
||||
Score: 0.8,
|
||||
Capabilities: []string{"admin"},
|
||||
LastSeen: time.Now(),
|
||||
}
|
||||
|
||||
err := em.AddCandidate(candidate)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add candidate: %v", err)
|
||||
}
|
||||
|
||||
// Run concurrent operations
|
||||
done := make(chan bool, 2)
|
||||
|
||||
// Concurrent voting
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.Vote("candidate-1") // Ignore errors in concurrent test
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Concurrent state checking
|
||||
go func() {
|
||||
defer func() { done <- true }()
|
||||
for i := 0; i < 10; i++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
em.findElectionWinner() // Just check for races
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for completion
|
||||
for i := 0; i < 2; i++ {
|
||||
select {
|
||||
case <-done:
|
||||
case <-ctx.Done():
|
||||
t.Fatal("Concurrent test timed out")
|
||||
}
|
||||
if err := em.SendAdminHeartbeat(); err != nil {
|
||||
t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -2,26 +2,25 @@ package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system
|
||||
type CHORUSMetrics struct {
|
||||
registry *prometheus.Registry
|
||||
httpServer *http.Server
|
||||
registry *prometheus.Registry
|
||||
httpServer *http.Server
|
||||
|
||||
// System metrics
|
||||
systemInfo *prometheus.GaugeVec
|
||||
uptime prometheus.Gauge
|
||||
buildInfo *prometheus.GaugeVec
|
||||
systemInfo *prometheus.GaugeVec
|
||||
uptime prometheus.Gauge
|
||||
buildInfo *prometheus.GaugeVec
|
||||
|
||||
// P2P metrics
|
||||
p2pConnectedPeers prometheus.Gauge
|
||||
@@ -32,44 +31,44 @@ type CHORUSMetrics struct {
|
||||
p2pPeerScore *prometheus.GaugeVec
|
||||
|
||||
// DHT metrics
|
||||
dhtPutOperations *prometheus.CounterVec
|
||||
dhtGetOperations *prometheus.CounterVec
|
||||
dhtOperationLatency *prometheus.HistogramVec
|
||||
dhtProviderRecords prometheus.Gauge
|
||||
dhtReplicationFactor *prometheus.GaugeVec
|
||||
dhtContentKeys prometheus.Gauge
|
||||
dhtCacheHits *prometheus.CounterVec
|
||||
dhtCacheMisses *prometheus.CounterVec
|
||||
dhtPutOperations *prometheus.CounterVec
|
||||
dhtGetOperations *prometheus.CounterVec
|
||||
dhtOperationLatency *prometheus.HistogramVec
|
||||
dhtProviderRecords prometheus.Gauge
|
||||
dhtReplicationFactor *prometheus.GaugeVec
|
||||
dhtContentKeys prometheus.Gauge
|
||||
dhtCacheHits *prometheus.CounterVec
|
||||
dhtCacheMisses *prometheus.CounterVec
|
||||
|
||||
// PubSub metrics
|
||||
pubsubTopics prometheus.Gauge
|
||||
pubsubSubscribers *prometheus.GaugeVec
|
||||
pubsubMessages *prometheus.CounterVec
|
||||
pubsubMessageLatency *prometheus.HistogramVec
|
||||
pubsubMessageSize *prometheus.HistogramVec
|
||||
pubsubTopics prometheus.Gauge
|
||||
pubsubSubscribers *prometheus.GaugeVec
|
||||
pubsubMessages *prometheus.CounterVec
|
||||
pubsubMessageLatency *prometheus.HistogramVec
|
||||
pubsubMessageSize *prometheus.HistogramVec
|
||||
|
||||
// Election metrics
|
||||
electionTerm prometheus.Gauge
|
||||
electionState *prometheus.GaugeVec
|
||||
heartbeatsSent prometheus.Counter
|
||||
heartbeatsReceived prometheus.Counter
|
||||
leadershipChanges prometheus.Counter
|
||||
leaderUptime prometheus.Gauge
|
||||
electionLatency prometheus.Histogram
|
||||
electionTerm prometheus.Gauge
|
||||
electionState *prometheus.GaugeVec
|
||||
heartbeatsSent prometheus.Counter
|
||||
heartbeatsReceived prometheus.Counter
|
||||
leadershipChanges prometheus.Counter
|
||||
leaderUptime prometheus.Gauge
|
||||
electionLatency prometheus.Histogram
|
||||
|
||||
// Health metrics
|
||||
healthChecksPassed *prometheus.CounterVec
|
||||
healthChecksFailed *prometheus.CounterVec
|
||||
healthCheckDuration *prometheus.HistogramVec
|
||||
systemHealthScore prometheus.Gauge
|
||||
componentHealthScore *prometheus.GaugeVec
|
||||
healthChecksPassed *prometheus.CounterVec
|
||||
healthChecksFailed *prometheus.CounterVec
|
||||
healthCheckDuration *prometheus.HistogramVec
|
||||
systemHealthScore prometheus.Gauge
|
||||
componentHealthScore *prometheus.GaugeVec
|
||||
|
||||
// Task metrics
|
||||
tasksActive prometheus.Gauge
|
||||
tasksQueued prometheus.Gauge
|
||||
tasksCompleted *prometheus.CounterVec
|
||||
taskDuration *prometheus.HistogramVec
|
||||
taskQueueWaitTime prometheus.Histogram
|
||||
tasksActive prometheus.Gauge
|
||||
tasksQueued prometheus.Gauge
|
||||
tasksCompleted *prometheus.CounterVec
|
||||
taskDuration *prometheus.HistogramVec
|
||||
taskQueueWaitTime prometheus.Histogram
|
||||
|
||||
// SLURP metrics (context generation)
|
||||
slurpGenerated *prometheus.CounterVec
|
||||
@@ -78,6 +77,9 @@ type CHORUSMetrics struct {
|
||||
slurpActiveJobs prometheus.Gauge
|
||||
slurpLeadershipEvents prometheus.Counter
|
||||
|
||||
// SHHH sentinel metrics
|
||||
shhhFindings *prometheus.CounterVec
|
||||
|
||||
// UCXI metrics (protocol resolution)
|
||||
ucxiRequests *prometheus.CounterVec
|
||||
ucxiResolutionLatency prometheus.Histogram
|
||||
@@ -86,39 +88,39 @@ type CHORUSMetrics struct {
|
||||
ucxiContentSize prometheus.Histogram
|
||||
|
||||
// Resource metrics
|
||||
cpuUsage prometheus.Gauge
|
||||
memoryUsage prometheus.Gauge
|
||||
diskUsage *prometheus.GaugeVec
|
||||
networkBytesIn prometheus.Counter
|
||||
networkBytesOut prometheus.Counter
|
||||
goroutines prometheus.Gauge
|
||||
cpuUsage prometheus.Gauge
|
||||
memoryUsage prometheus.Gauge
|
||||
diskUsage *prometheus.GaugeVec
|
||||
networkBytesIn prometheus.Counter
|
||||
networkBytesOut prometheus.Counter
|
||||
goroutines prometheus.Gauge
|
||||
|
||||
// Error metrics
|
||||
errors *prometheus.CounterVec
|
||||
panics prometheus.Counter
|
||||
errors *prometheus.CounterVec
|
||||
panics prometheus.Counter
|
||||
|
||||
startTime time.Time
|
||||
mu sync.RWMutex
|
||||
startTime time.Time
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// MetricsConfig configures the metrics system
|
||||
type MetricsConfig struct {
|
||||
// HTTP server config
|
||||
ListenAddr string
|
||||
MetricsPath string
|
||||
ListenAddr string
|
||||
MetricsPath string
|
||||
|
||||
// Histogram buckets
|
||||
LatencyBuckets []float64
|
||||
SizeBuckets []float64
|
||||
|
||||
// Labels
|
||||
NodeID string
|
||||
Version string
|
||||
Environment string
|
||||
Cluster string
|
||||
NodeID string
|
||||
Version string
|
||||
Environment string
|
||||
Cluster string
|
||||
|
||||
// Collection intervals
|
||||
SystemMetricsInterval time.Duration
|
||||
SystemMetricsInterval time.Duration
|
||||
ResourceMetricsInterval time.Duration
|
||||
}
|
||||
|
||||
@@ -409,6 +411,15 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
||||
},
|
||||
)
|
||||
|
||||
// SHHH metrics
|
||||
m.shhhFindings = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "chorus_shhh_findings_total",
|
||||
Help: "Total number of SHHH redaction findings",
|
||||
},
|
||||
[]string{"rule", "severity"},
|
||||
)
|
||||
|
||||
// UCXI metrics
|
||||
m.ucxiRequests = promauto.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
@@ -656,6 +667,15 @@ func (m *CHORUSMetrics) SetSLURPQueueLength(length int) {
|
||||
m.slurpQueueLength.Set(float64(length))
|
||||
}
|
||||
|
||||
// SHHH Metrics Methods
|
||||
|
||||
func (m *CHORUSMetrics) IncrementSHHHFindings(rule, severity string, count int) {
|
||||
if m == nil || m.shhhFindings == nil || count <= 0 {
|
||||
return
|
||||
}
|
||||
m.shhhFindings.WithLabelValues(rule, severity).Add(float64(count))
|
||||
}
|
||||
|
||||
// UCXI Metrics Methods
|
||||
|
||||
func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) {
|
||||
|
||||
11
pkg/shhh/doc.go
Normal file
11
pkg/shhh/doc.go
Normal file
@@ -0,0 +1,11 @@
|
||||
// Package shhh provides the CHORUS secrets sentinel responsible for detecting
|
||||
// and redacting sensitive values before they leave the runtime. The sentinel
|
||||
// focuses on predictable failure modes (log emission, telemetry fan-out,
|
||||
// request forwarding) and offers a composable API for registering additional
|
||||
// redaction rules, emitting audit events, and tracking operational metrics.
|
||||
//
|
||||
// The initial implementation focuses on high-signal secrets (API keys,
|
||||
// bearer/OAuth tokens, private keys) so the runtime can start integrating
|
||||
// SHHH into COOEE and WHOOSH logging immediately while the broader roadmap
|
||||
// items (automated redaction replay, policy driven rules) continue landing.
|
||||
package shhh
|
||||
130
pkg/shhh/rule.go
Normal file
130
pkg/shhh/rule.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type compiledRule struct {
|
||||
name string
|
||||
regex *regexp.Regexp
|
||||
replacement string
|
||||
severity Severity
|
||||
tags []string
|
||||
}
|
||||
|
||||
type matchRecord struct {
|
||||
value string
|
||||
}
|
||||
|
||||
func (r *compiledRule) apply(in string) (string, []matchRecord) {
|
||||
indices := r.regex.FindAllStringSubmatchIndex(in, -1)
|
||||
if len(indices) == 0 {
|
||||
return in, nil
|
||||
}
|
||||
|
||||
var builder strings.Builder
|
||||
builder.Grow(len(in))
|
||||
|
||||
matches := make([]matchRecord, 0, len(indices))
|
||||
last := 0
|
||||
for _, loc := range indices {
|
||||
start, end := loc[0], loc[1]
|
||||
builder.WriteString(in[last:start])
|
||||
replaced := r.regex.ExpandString(nil, r.replacement, in, loc)
|
||||
builder.Write(replaced)
|
||||
matches = append(matches, matchRecord{value: in[start:end]})
|
||||
last = end
|
||||
}
|
||||
builder.WriteString(in[last:])
|
||||
|
||||
return builder.String(), matches
|
||||
}
|
||||
|
||||
func buildDefaultRuleConfigs(placeholder string) []RuleConfig {
|
||||
if placeholder == "" {
|
||||
placeholder = "[REDACTED]"
|
||||
}
|
||||
return []RuleConfig{
|
||||
{
|
||||
Name: "bearer-token",
|
||||
Pattern: `(?i)(authorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`,
|
||||
ReplacementTemplate: "$1" + placeholder,
|
||||
Severity: SeverityMedium,
|
||||
Tags: []string{"token", "http"},
|
||||
},
|
||||
{
|
||||
Name: "api-key",
|
||||
Pattern: `(?i)((?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"credentials"},
|
||||
},
|
||||
{
|
||||
Name: "openai-secret",
|
||||
Pattern: `(sk-[A-Za-z0-9]{20,})`,
|
||||
ReplacementTemplate: placeholder,
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"llm", "api"},
|
||||
},
|
||||
{
|
||||
Name: "oauth-refresh-token",
|
||||
Pattern: `(?i)(refresh_token"?\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||
Severity: SeverityMedium,
|
||||
Tags: []string{"oauth"},
|
||||
},
|
||||
{
|
||||
Name: "private-key-block",
|
||||
Pattern: `(?s)(-----BEGIN [^-]+ PRIVATE KEY-----)[^-]+(-----END [^-]+ PRIVATE KEY-----)`,
|
||||
ReplacementTemplate: "$1\n" + placeholder + "\n$2",
|
||||
Severity: SeverityHigh,
|
||||
Tags: []string{"pem", "key"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func compileRules(cfg Config, placeholder string) ([]*compiledRule, error) {
|
||||
configs := make([]RuleConfig, 0)
|
||||
if !cfg.DisableDefaultRules {
|
||||
configs = append(configs, buildDefaultRuleConfigs(placeholder)...)
|
||||
}
|
||||
configs = append(configs, cfg.CustomRules...)
|
||||
|
||||
rules := make([]*compiledRule, 0, len(configs))
|
||||
for _, rc := range configs {
|
||||
if rc.Name == "" || rc.Pattern == "" {
|
||||
continue
|
||||
}
|
||||
replacement := rc.ReplacementTemplate
|
||||
if replacement == "" {
|
||||
replacement = placeholder
|
||||
}
|
||||
re, err := regexp.Compile(rc.Pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
compiled := &compiledRule{
|
||||
name: rc.Name,
|
||||
replacement: replacement,
|
||||
regex: re,
|
||||
severity: rc.Severity,
|
||||
tags: append([]string(nil), rc.Tags...),
|
||||
}
|
||||
rules = append(rules, compiled)
|
||||
}
|
||||
|
||||
sort.SliceStable(rules, func(i, j int) bool {
|
||||
return rules[i].name < rules[j].name
|
||||
})
|
||||
|
||||
return rules, nil
|
||||
}
|
||||
|
||||
func hashSecret(value string) string {
|
||||
sum := sha256.Sum256([]byte(value))
|
||||
return base64.RawStdEncoding.EncodeToString(sum[:])
|
||||
}
|
||||
407
pkg/shhh/sentinel.go
Normal file
407
pkg/shhh/sentinel.go
Normal file
@@ -0,0 +1,407 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Option configures the sentinel during construction.
|
||||
type Option func(*Sentinel)
|
||||
|
||||
// FindingObserver receives aggregated findings for each redaction operation.
|
||||
type FindingObserver func(context.Context, []Finding)
|
||||
|
||||
// WithAuditSink attaches an audit sink for per-redaction events.
|
||||
func WithAuditSink(sink AuditSink) Option {
|
||||
return func(s *Sentinel) {
|
||||
s.audit = sink
|
||||
}
|
||||
}
|
||||
|
||||
// WithStats allows callers to supply a shared stats collector.
|
||||
func WithStats(stats *Stats) Option {
|
||||
return func(s *Sentinel) {
|
||||
s.stats = stats
|
||||
}
|
||||
}
|
||||
|
||||
// WithFindingObserver registers an observer that is invoked whenever redaction
|
||||
// produces findings.
|
||||
func WithFindingObserver(observer FindingObserver) Option {
|
||||
return func(s *Sentinel) {
|
||||
if observer == nil {
|
||||
return
|
||||
}
|
||||
s.observers = append(s.observers, observer)
|
||||
}
|
||||
}
|
||||
|
||||
// Sentinel performs secret detection/redaction across text payloads.
|
||||
type Sentinel struct {
|
||||
mu sync.RWMutex
|
||||
enabled bool
|
||||
placeholder string
|
||||
rules []*compiledRule
|
||||
audit AuditSink
|
||||
stats *Stats
|
||||
observers []FindingObserver
|
||||
}
|
||||
|
||||
// NewSentinel creates a new secrets sentinel using the provided configuration.
|
||||
func NewSentinel(cfg Config, opts ...Option) (*Sentinel, error) {
|
||||
placeholder := cfg.RedactionPlaceholder
|
||||
if placeholder == "" {
|
||||
placeholder = "[REDACTED]"
|
||||
}
|
||||
|
||||
s := &Sentinel{
|
||||
enabled: !cfg.Disabled,
|
||||
placeholder: placeholder,
|
||||
stats: NewStats(),
|
||||
}
|
||||
for _, opt := range opts {
|
||||
opt(s)
|
||||
}
|
||||
if s.stats == nil {
|
||||
s.stats = NewStats()
|
||||
}
|
||||
|
||||
rules, err := compileRules(cfg, placeholder)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("compile SHHH rules: %w", err)
|
||||
}
|
||||
if len(rules) == 0 {
|
||||
return nil, errors.New("no SHHH rules configured")
|
||||
}
|
||||
s.rules = rules
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Enabled reports whether the sentinel is actively redacting.
|
||||
func (s *Sentinel) Enabled() bool {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return s.enabled
|
||||
}
|
||||
|
||||
// Toggle enables or disables the sentinel at runtime.
|
||||
func (s *Sentinel) Toggle(enabled bool) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.enabled = enabled
|
||||
}
|
||||
|
||||
// SetAuditSink updates the audit sink at runtime.
|
||||
func (s *Sentinel) SetAuditSink(sink AuditSink) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.audit = sink
|
||||
}
|
||||
|
||||
// AddFindingObserver registers an observer after construction.
|
||||
func (s *Sentinel) AddFindingObserver(observer FindingObserver) {
|
||||
if observer == nil {
|
||||
return
|
||||
}
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.observers = append(s.observers, observer)
|
||||
}
|
||||
|
||||
// StatsSnapshot returns a snapshot of the current counters.
|
||||
func (s *Sentinel) StatsSnapshot() StatsSnapshot {
|
||||
s.mu.RLock()
|
||||
stats := s.stats
|
||||
s.mu.RUnlock()
|
||||
if stats == nil {
|
||||
return StatsSnapshot{}
|
||||
}
|
||||
return stats.Snapshot()
|
||||
}
|
||||
|
||||
// RedactText scans the provided text and redacts any findings.
|
||||
func (s *Sentinel) RedactText(ctx context.Context, text string, labels map[string]string) (string, []Finding) {
|
||||
s.mu.RLock()
|
||||
enabled := s.enabled
|
||||
rules := s.rules
|
||||
stats := s.stats
|
||||
audit := s.audit
|
||||
s.mu.RUnlock()
|
||||
|
||||
if !enabled || len(rules) == 0 {
|
||||
return text, nil
|
||||
}
|
||||
if stats != nil {
|
||||
stats.IncScan()
|
||||
}
|
||||
|
||||
aggregates := make(map[string]*findingAggregate)
|
||||
current := text
|
||||
path := derivePath(labels)
|
||||
|
||||
for _, rule := range rules {
|
||||
redacted, matches := rule.apply(current)
|
||||
if len(matches) == 0 {
|
||||
continue
|
||||
}
|
||||
current = redacted
|
||||
if stats != nil {
|
||||
stats.AddFindings(rule.name, len(matches))
|
||||
}
|
||||
recordAggregate(aggregates, rule, path, len(matches))
|
||||
|
||||
if audit != nil {
|
||||
metadata := cloneLabels(labels)
|
||||
for _, match := range matches {
|
||||
event := AuditEvent{
|
||||
Rule: rule.name,
|
||||
Severity: rule.severity,
|
||||
Tags: append([]string(nil), rule.tags...),
|
||||
Path: path,
|
||||
Hash: hashSecret(match.value),
|
||||
Metadata: metadata,
|
||||
}
|
||||
audit.RecordRedaction(ctx, event)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
findings := flattenAggregates(aggregates)
|
||||
s.notifyObservers(ctx, findings)
|
||||
return current, findings
|
||||
}
|
||||
|
||||
// RedactMap walks the map and redacts in-place. It returns the collected findings.
|
||||
func (s *Sentinel) RedactMap(ctx context.Context, payload map[string]any) []Finding {
|
||||
return s.RedactMapWithLabels(ctx, payload, nil)
|
||||
}
|
||||
|
||||
// RedactMapWithLabels allows callers to specify base labels that will be merged
|
||||
// into metadata for nested structures.
|
||||
func (s *Sentinel) RedactMapWithLabels(ctx context.Context, payload map[string]any, baseLabels map[string]string) []Finding {
|
||||
if payload == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
aggregates := make(map[string]*findingAggregate)
|
||||
s.redactValue(ctx, payload, "", baseLabels, aggregates)
|
||||
findings := flattenAggregates(aggregates)
|
||||
s.notifyObservers(ctx, findings)
|
||||
return findings
|
||||
}
|
||||
|
||||
func (s *Sentinel) redactValue(ctx context.Context, value any, path string, baseLabels map[string]string, agg map[string]*findingAggregate) {
|
||||
switch v := value.(type) {
|
||||
case map[string]interface{}:
|
||||
for key, val := range v {
|
||||
childPath := joinPath(path, key)
|
||||
switch typed := val.(type) {
|
||||
case string:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||
if redacted != typed {
|
||||
v[key] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
case fmt.Stringer:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
text := typed.String()
|
||||
redacted, findings := s.RedactText(ctx, text, labels)
|
||||
if redacted != text {
|
||||
v[key] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
default:
|
||||
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||
}
|
||||
}
|
||||
case []interface{}:
|
||||
for idx, item := range v {
|
||||
childPath := indexPath(path, idx)
|
||||
switch typed := item.(type) {
|
||||
case string:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||
if redacted != typed {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
case fmt.Stringer:
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
text := typed.String()
|
||||
redacted, findings := s.RedactText(ctx, text, labels)
|
||||
if redacted != text {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
default:
|
||||
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||
}
|
||||
}
|
||||
case []string:
|
||||
for idx, item := range v {
|
||||
childPath := indexPath(path, idx)
|
||||
labels := mergeLabels(baseLabels, childPath)
|
||||
redacted, findings := s.RedactText(ctx, item, labels)
|
||||
if redacted != item {
|
||||
v[idx] = redacted
|
||||
}
|
||||
mergeAggregates(agg, findings)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Sentinel) notifyObservers(ctx context.Context, findings []Finding) {
|
||||
if len(findings) == 0 {
|
||||
return
|
||||
}
|
||||
findingsCopy := append([]Finding(nil), findings...)
|
||||
s.mu.RLock()
|
||||
observers := append([]FindingObserver(nil), s.observers...)
|
||||
s.mu.RUnlock()
|
||||
for _, observer := range observers {
|
||||
observer(ctx, findingsCopy)
|
||||
}
|
||||
}
|
||||
|
||||
func mergeAggregates(dest map[string]*findingAggregate, findings []Finding) {
|
||||
for i := range findings {
|
||||
f := findings[i]
|
||||
agg := dest[f.Rule]
|
||||
if agg == nil {
|
||||
agg = &findingAggregate{
|
||||
rule: f.Rule,
|
||||
severity: f.Severity,
|
||||
tags: append([]string(nil), f.Tags...),
|
||||
locations: make(map[string]int),
|
||||
}
|
||||
dest[f.Rule] = agg
|
||||
}
|
||||
agg.count += f.Count
|
||||
for _, loc := range f.Locations {
|
||||
agg.locations[loc.Path] += loc.Count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func recordAggregate(dest map[string]*findingAggregate, rule *compiledRule, path string, count int) {
|
||||
agg := dest[rule.name]
|
||||
if agg == nil {
|
||||
agg = &findingAggregate{
|
||||
rule: rule.name,
|
||||
severity: rule.severity,
|
||||
tags: append([]string(nil), rule.tags...),
|
||||
locations: make(map[string]int),
|
||||
}
|
||||
dest[rule.name] = agg
|
||||
}
|
||||
agg.count += count
|
||||
if path != "" {
|
||||
agg.locations[path] += count
|
||||
}
|
||||
}
|
||||
|
||||
func flattenAggregates(agg map[string]*findingAggregate) []Finding {
|
||||
if len(agg) == 0 {
|
||||
return nil
|
||||
}
|
||||
keys := make([]string, 0, len(agg))
|
||||
for key := range agg {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
findings := make([]Finding, 0, len(agg))
|
||||
for _, key := range keys {
|
||||
entry := agg[key]
|
||||
locations := make([]Location, 0, len(entry.locations))
|
||||
if len(entry.locations) > 0 {
|
||||
paths := make([]string, 0, len(entry.locations))
|
||||
for path := range entry.locations {
|
||||
paths = append(paths, path)
|
||||
}
|
||||
sort.Strings(paths)
|
||||
for _, path := range paths {
|
||||
locations = append(locations, Location{Path: path, Count: entry.locations[path]})
|
||||
}
|
||||
}
|
||||
findings = append(findings, Finding{
|
||||
Rule: entry.rule,
|
||||
Severity: entry.severity,
|
||||
Tags: append([]string(nil), entry.tags...),
|
||||
Count: entry.count,
|
||||
Locations: locations,
|
||||
})
|
||||
}
|
||||
return findings
|
||||
}
|
||||
|
||||
func derivePath(labels map[string]string) string {
|
||||
if labels == nil {
|
||||
return ""
|
||||
}
|
||||
if path := labels["path"]; path != "" {
|
||||
return path
|
||||
}
|
||||
if path := labels["source"]; path != "" {
|
||||
return path
|
||||
}
|
||||
if path := labels["field"]; path != "" {
|
||||
return path
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func cloneLabels(labels map[string]string) map[string]string {
|
||||
if len(labels) == 0 {
|
||||
return nil
|
||||
}
|
||||
clone := make(map[string]string, len(labels))
|
||||
for k, v := range labels {
|
||||
clone[k] = v
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
func joinPath(prefix, key string) string {
|
||||
if prefix == "" {
|
||||
return key
|
||||
}
|
||||
if key == "" {
|
||||
return prefix
|
||||
}
|
||||
return prefix + "." + key
|
||||
}
|
||||
|
||||
func indexPath(prefix string, idx int) string {
|
||||
if prefix == "" {
|
||||
return fmt.Sprintf("[%d]", idx)
|
||||
}
|
||||
return fmt.Sprintf("%s[%d]", prefix, idx)
|
||||
}
|
||||
|
||||
func mergeLabels(base map[string]string, path string) map[string]string {
|
||||
if base == nil && path == "" {
|
||||
return nil
|
||||
}
|
||||
labels := cloneLabels(base)
|
||||
if labels == nil {
|
||||
labels = make(map[string]string, 1)
|
||||
}
|
||||
if path != "" {
|
||||
labels["path"] = path
|
||||
}
|
||||
return labels
|
||||
}
|
||||
|
||||
type findingAggregate struct {
|
||||
rule string
|
||||
severity Severity
|
||||
tags []string
|
||||
count int
|
||||
locations map[string]int
|
||||
}
|
||||
95
pkg/shhh/sentinel_test.go
Normal file
95
pkg/shhh/sentinel_test.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type recordingSink struct {
|
||||
events []AuditEvent
|
||||
}
|
||||
|
||||
func (r *recordingSink) RecordRedaction(_ context.Context, event AuditEvent) {
|
||||
r.events = append(r.events, event)
|
||||
}
|
||||
|
||||
func TestRedactText_DefaultRules(t *testing.T) {
|
||||
sentinel, err := NewSentinel(Config{})
|
||||
require.NoError(t, err)
|
||||
|
||||
input := "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.secret"
|
||||
redacted, findings := sentinel.RedactText(context.Background(), input, map[string]string{"source": "http.request.headers.authorization"})
|
||||
|
||||
require.Equal(t, "Authorization: Bearer [REDACTED]", redacted)
|
||||
require.Len(t, findings, 1)
|
||||
require.Equal(t, "bearer-token", findings[0].Rule)
|
||||
require.Equal(t, 1, findings[0].Count)
|
||||
require.NotEmpty(t, findings[0].Locations)
|
||||
|
||||
snapshot := sentinel.StatsSnapshot()
|
||||
require.Equal(t, uint64(1), snapshot.TotalScans)
|
||||
require.Equal(t, uint64(1), snapshot.TotalFindings)
|
||||
require.Equal(t, uint64(1), snapshot.PerRuleFindings["bearer-token"])
|
||||
}
|
||||
|
||||
func TestRedactMap_NestedStructures(t *testing.T) {
|
||||
sentinel, err := NewSentinel(Config{})
|
||||
require.NoError(t, err)
|
||||
|
||||
payload := map[string]any{
|
||||
"config": map[string]any{
|
||||
"api_key": "API_KEY=1234567890ABCDEFG",
|
||||
},
|
||||
"tokens": []any{
|
||||
"sk-test1234567890ABCDEF",
|
||||
map[string]any{"refresh": "refresh_token=abcdef12345"},
|
||||
},
|
||||
}
|
||||
|
||||
findings := sentinel.RedactMap(context.Background(), payload)
|
||||
require.NotEmpty(t, findings)
|
||||
|
||||
config := payload["config"].(map[string]any)
|
||||
require.Equal(t, "API_KEY=[REDACTED]", config["api_key"])
|
||||
|
||||
tokens := payload["tokens"].([]any)
|
||||
require.Equal(t, "[REDACTED]", tokens[0])
|
||||
|
||||
inner := tokens[1].(map[string]any)
|
||||
require.Equal(t, "refresh_token=[REDACTED]", inner["refresh"])
|
||||
|
||||
total := 0
|
||||
for _, finding := range findings {
|
||||
total += finding.Count
|
||||
}
|
||||
require.Equal(t, 3, total)
|
||||
}
|
||||
|
||||
func TestAuditSinkReceivesEvents(t *testing.T) {
|
||||
sink := &recordingSink{}
|
||||
cfg := Config{
|
||||
DisableDefaultRules: true,
|
||||
CustomRules: []RuleConfig{
|
||||
{
|
||||
Name: "custom-secret",
|
||||
Pattern: `(secret\s*=\s*)([A-Za-z0-9]{6,})`,
|
||||
ReplacementTemplate: "$1[REDACTED]",
|
||||
Severity: SeverityHigh,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
sentinel, err := NewSentinel(cfg, WithAuditSink(sink))
|
||||
require.NoError(t, err)
|
||||
|
||||
_, findings := sentinel.RedactText(context.Background(), "secret=mysecretvalue", map[string]string{"source": "test"})
|
||||
require.Len(t, findings, 1)
|
||||
require.Equal(t, 1, findings[0].Count)
|
||||
|
||||
require.Len(t, sink.events, 1)
|
||||
require.Equal(t, "custom-secret", sink.events[0].Rule)
|
||||
require.NotEmpty(t, sink.events[0].Hash)
|
||||
require.Equal(t, "test", sink.events[0].Path)
|
||||
}
|
||||
60
pkg/shhh/stats.go
Normal file
60
pkg/shhh/stats.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package shhh
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Stats tracks aggregate counts for the sentinel.
|
||||
type Stats struct {
|
||||
totalScans atomic.Uint64
|
||||
totalFindings atomic.Uint64
|
||||
perRule sync.Map // string -> *atomic.Uint64
|
||||
}
|
||||
|
||||
// NewStats constructs a Stats collector.
|
||||
func NewStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
||||
|
||||
// IncScan increments the total scan counter.
|
||||
func (s *Stats) IncScan() {
|
||||
if s == nil {
|
||||
return
|
||||
}
|
||||
s.totalScans.Add(1)
|
||||
}
|
||||
|
||||
// AddFindings records findings for a rule.
|
||||
func (s *Stats) AddFindings(rule string, count int) {
|
||||
if s == nil || count <= 0 {
|
||||
return
|
||||
}
|
||||
s.totalFindings.Add(uint64(count))
|
||||
counterAny, _ := s.perRule.LoadOrStore(rule, new(atomic.Uint64))
|
||||
counter := counterAny.(*atomic.Uint64)
|
||||
counter.Add(uint64(count))
|
||||
}
|
||||
|
||||
// Snapshot returns a point-in-time view of the counters.
|
||||
func (s *Stats) Snapshot() StatsSnapshot {
|
||||
if s == nil {
|
||||
return StatsSnapshot{}
|
||||
}
|
||||
snapshot := StatsSnapshot{
|
||||
TotalScans: s.totalScans.Load(),
|
||||
TotalFindings: s.totalFindings.Load(),
|
||||
PerRuleFindings: make(map[string]uint64),
|
||||
}
|
||||
s.perRule.Range(func(key, value any) bool {
|
||||
name, ok := key.(string)
|
||||
if !ok {
|
||||
return true
|
||||
}
|
||||
if counter, ok := value.(*atomic.Uint64); ok {
|
||||
snapshot.PerRuleFindings[name] = counter.Load()
|
||||
}
|
||||
return true
|
||||
})
|
||||
return snapshot
|
||||
}
|
||||
73
pkg/shhh/types.go
Normal file
73
pkg/shhh/types.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package shhh
|
||||
|
||||
import "context"
|
||||
|
||||
// Severity represents the criticality associated with a redaction finding.
|
||||
type Severity string
|
||||
|
||||
const (
|
||||
// SeverityLow indicates low-impact findings (e.g. non-production credentials).
|
||||
SeverityLow Severity = "low"
|
||||
// SeverityMedium indicates medium impact findings (e.g. access tokens).
|
||||
SeverityMedium Severity = "medium"
|
||||
// SeverityHigh indicates high-impact findings (e.g. private keys).
|
||||
SeverityHigh Severity = "high"
|
||||
)
|
||||
|
||||
// RuleConfig defines a redaction rule that SHHH should enforce.
|
||||
type RuleConfig struct {
|
||||
Name string `json:"name"`
|
||||
Pattern string `json:"pattern"`
|
||||
ReplacementTemplate string `json:"replacement_template"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
// Config controls sentinel behaviour.
|
||||
type Config struct {
|
||||
// Disabled toggles redaction off entirely.
|
||||
Disabled bool `json:"disabled"`
|
||||
// RedactionPlaceholder overrides the default placeholder value.
|
||||
RedactionPlaceholder string `json:"redaction_placeholder"`
|
||||
// DisableDefaultRules disables the built-in curated rule set.
|
||||
DisableDefaultRules bool `json:"disable_default_rules"`
|
||||
// CustomRules allows callers to append bespoke redaction patterns.
|
||||
CustomRules []RuleConfig `json:"custom_rules"`
|
||||
}
|
||||
|
||||
// Finding represents a single rule firing during redaction.
|
||||
type Finding struct {
|
||||
Rule string `json:"rule"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Count int `json:"count"`
|
||||
Locations []Location `json:"locations,omitempty"`
|
||||
}
|
||||
|
||||
// Location describes where a secret was found.
|
||||
type Location struct {
|
||||
Path string `json:"path"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// StatsSnapshot exposes aggregate counters for observability.
|
||||
type StatsSnapshot struct {
|
||||
TotalScans uint64 `json:"total_scans"`
|
||||
TotalFindings uint64 `json:"total_findings"`
|
||||
PerRuleFindings map[string]uint64 `json:"per_rule_findings"`
|
||||
}
|
||||
|
||||
// AuditEvent captures a single redaction occurrence for downstream sinks.
|
||||
type AuditEvent struct {
|
||||
Rule string `json:"rule"`
|
||||
Severity Severity `json:"severity"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
Path string `json:"path,omitempty"`
|
||||
Hash string `json:"hash"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// AuditSink receives redaction events for long term storage / replay.
|
||||
type AuditSink interface {
|
||||
RecordRedaction(ctx context.Context, event AuditEvent)
|
||||
}
|
||||
@@ -13,11 +13,11 @@ import (
|
||||
|
||||
// DecisionPublisher handles publishing task completion decisions to encrypted DHT storage
|
||||
type DecisionPublisher struct {
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
dhtStorage storage.UCXLStorage
|
||||
nodeID string
|
||||
agentName string
|
||||
ctx context.Context
|
||||
config *config.Config
|
||||
dhtStorage storage.UCXLStorage
|
||||
nodeID string
|
||||
agentName string
|
||||
}
|
||||
|
||||
// NewDecisionPublisher creates a new decision publisher
|
||||
@@ -39,28 +39,28 @@ func NewDecisionPublisher(
|
||||
|
||||
// TaskDecision represents a decision made by an agent upon task completion
|
||||
type TaskDecision struct {
|
||||
Agent string `json:"agent"`
|
||||
Role string `json:"role"`
|
||||
Project string `json:"project"`
|
||||
Task string `json:"task"`
|
||||
Decision string `json:"decision"`
|
||||
Context map[string]interface{} `json:"context"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Success bool `json:"success"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
FilesModified []string `json:"files_modified,omitempty"`
|
||||
LinesChanged int `json:"lines_changed,omitempty"`
|
||||
TestResults *TestResults `json:"test_results,omitempty"`
|
||||
Dependencies []string `json:"dependencies,omitempty"`
|
||||
NextSteps []string `json:"next_steps,omitempty"`
|
||||
Agent string `json:"agent"`
|
||||
Role string `json:"role"`
|
||||
Project string `json:"project"`
|
||||
Task string `json:"task"`
|
||||
Decision string `json:"decision"`
|
||||
Context map[string]interface{} `json:"context"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Success bool `json:"success"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
FilesModified []string `json:"files_modified,omitempty"`
|
||||
LinesChanged int `json:"lines_changed,omitempty"`
|
||||
TestResults *TestResults `json:"test_results,omitempty"`
|
||||
Dependencies []string `json:"dependencies,omitempty"`
|
||||
NextSteps []string `json:"next_steps,omitempty"`
|
||||
}
|
||||
|
||||
// TestResults captures test execution results
|
||||
type TestResults struct {
|
||||
Passed int `json:"passed"`
|
||||
Failed int `json:"failed"`
|
||||
Skipped int `json:"skipped"`
|
||||
Coverage float64 `json:"coverage,omitempty"`
|
||||
Passed int `json:"passed"`
|
||||
Failed int `json:"failed"`
|
||||
Skipped int `json:"skipped"`
|
||||
Coverage float64 `json:"coverage,omitempty"`
|
||||
FailedTests []string `json:"failed_tests,omitempty"`
|
||||
}
|
||||
|
||||
@@ -74,7 +74,11 @@ func (dp *DecisionPublisher) PublishTaskDecision(decision *TaskDecision) error {
|
||||
decision.Role = dp.config.Agent.Role
|
||||
}
|
||||
if decision.Project == "" {
|
||||
decision.Project = "default-project" // TODO: Add project field to config
|
||||
if project := dp.config.Agent.Project; project != "" {
|
||||
decision.Project = project
|
||||
} else {
|
||||
decision.Project = "chorus"
|
||||
}
|
||||
}
|
||||
if decision.Timestamp.IsZero() {
|
||||
decision.Timestamp = time.Now()
|
||||
@@ -173,16 +177,16 @@ func (dp *DecisionPublisher) PublishArchitecturalDecision(
|
||||
nextSteps []string,
|
||||
) error {
|
||||
taskDecision := &TaskDecision{
|
||||
Task: taskName,
|
||||
Decision: decision,
|
||||
Success: true,
|
||||
Task: taskName,
|
||||
Decision: decision,
|
||||
Success: true,
|
||||
NextSteps: nextSteps,
|
||||
Context: map[string]interface{}{
|
||||
"decision_type": "architecture",
|
||||
"rationale": rationale,
|
||||
"alternatives": alternatives,
|
||||
"implications": implications,
|
||||
"node_id": dp.nodeID,
|
||||
"decision_type": "architecture",
|
||||
"rationale": rationale,
|
||||
"alternatives": alternatives,
|
||||
"implications": implications,
|
||||
"node_id": dp.nodeID,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -341,10 +345,10 @@ func (dp *DecisionPublisher) PublishSystemStatus(
|
||||
Decision: status,
|
||||
Success: dp.allHealthChecksPass(healthChecks),
|
||||
Context: map[string]interface{}{
|
||||
"decision_type": "system",
|
||||
"metrics": metrics,
|
||||
"health_checks": healthChecks,
|
||||
"node_id": dp.nodeID,
|
||||
"decision_type": "system",
|
||||
"metrics": metrics,
|
||||
"health_checks": healthChecks,
|
||||
"node_id": dp.nodeID,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -364,13 +368,17 @@ func (dp *DecisionPublisher) allHealthChecksPass(healthChecks map[string]bool) b
|
||||
// GetPublisherMetrics returns metrics about the decision publisher
|
||||
func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} {
|
||||
dhtMetrics := dp.dhtStorage.GetMetrics()
|
||||
project := dp.config.Agent.Project
|
||||
if project == "" {
|
||||
project = "chorus"
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"node_id": dp.nodeID,
|
||||
"agent_name": dp.agentName,
|
||||
"current_role": dp.config.Agent.Role,
|
||||
"project": "default-project", // TODO: Add project field to config
|
||||
"dht_metrics": dhtMetrics,
|
||||
"last_publish": time.Now(), // This would be tracked in a real implementation
|
||||
"node_id": dp.nodeID,
|
||||
"agent_name": dp.agentName,
|
||||
"current_role": dp.config.Agent.Role,
|
||||
"project": project,
|
||||
"dht_metrics": dhtMetrics,
|
||||
"last_publish": time.Now(), // This would be tracked in a real implementation
|
||||
}
|
||||
}
|
||||
381
pubsub/pubsub.go
381
pubsub/pubsub.go
@@ -8,9 +8,10 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/shhh"
|
||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
"github.com/libp2p/go-libp2p/core/peer"
|
||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||
)
|
||||
|
||||
// PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion
|
||||
@@ -21,34 +22,40 @@ type PubSub struct {
|
||||
cancel context.CancelFunc
|
||||
|
||||
// Topic subscriptions
|
||||
chorusTopic *pubsub.Topic
|
||||
hmmmTopic *pubsub.Topic
|
||||
contextTopic *pubsub.Topic
|
||||
chorusTopic *pubsub.Topic
|
||||
hmmmTopic *pubsub.Topic
|
||||
contextTopic *pubsub.Topic
|
||||
|
||||
// Message subscriptions
|
||||
chorusSub *pubsub.Subscription
|
||||
hmmmSub *pubsub.Subscription
|
||||
contextSub *pubsub.Subscription
|
||||
chorusSub *pubsub.Subscription
|
||||
hmmmSub *pubsub.Subscription
|
||||
contextSub *pubsub.Subscription
|
||||
|
||||
// Dynamic topic management
|
||||
dynamicTopics map[string]*pubsub.Topic
|
||||
dynamicTopicsMux sync.RWMutex
|
||||
dynamicSubs map[string]*pubsub.Subscription
|
||||
dynamicSubsMux sync.RWMutex
|
||||
dynamicTopics map[string]*pubsub.Topic
|
||||
dynamicTopicsMux sync.RWMutex
|
||||
dynamicSubs map[string]*pubsub.Subscription
|
||||
dynamicSubsMux sync.RWMutex
|
||||
dynamicHandlers map[string]func([]byte, peer.ID)
|
||||
dynamicHandlersMux sync.RWMutex
|
||||
|
||||
// Configuration
|
||||
chorusTopicName string
|
||||
hmmmTopicName string
|
||||
contextTopicName string
|
||||
chorusTopicName string
|
||||
hmmmTopicName string
|
||||
contextTopicName string
|
||||
|
||||
// External message handler for HMMM messages
|
||||
HmmmMessageHandler func(msg Message, from peer.ID)
|
||||
HmmmMessageHandler func(msg Message, from peer.ID)
|
||||
|
||||
// External message handler for Context Feedback messages
|
||||
ContextFeedbackHandler func(msg Message, from peer.ID)
|
||||
|
||||
// Hypercore-style logging
|
||||
hypercoreLog HypercoreLogger
|
||||
|
||||
// SHHH sentinel
|
||||
redactor *shhh.Sentinel
|
||||
redactorMux sync.RWMutex
|
||||
}
|
||||
|
||||
// HypercoreLogger interface for dependency injection
|
||||
@@ -62,45 +69,45 @@ type MessageType string
|
||||
|
||||
const (
|
||||
// Bzzz coordination messages
|
||||
TaskAnnouncement MessageType = "task_announcement"
|
||||
TaskClaim MessageType = "task_claim"
|
||||
TaskProgress MessageType = "task_progress"
|
||||
TaskComplete MessageType = "task_complete"
|
||||
CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change
|
||||
TaskAnnouncement MessageType = "task_announcement"
|
||||
TaskClaim MessageType = "task_claim"
|
||||
TaskProgress MessageType = "task_progress"
|
||||
TaskComplete MessageType = "task_complete"
|
||||
CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change
|
||||
AvailabilityBcast MessageType = "availability_broadcast" // Regular availability status
|
||||
|
||||
// HMMM meta-discussion messages
|
||||
MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion
|
||||
TaskHelpRequest MessageType = "task_help_request" // Request for assistance
|
||||
TaskHelpResponse MessageType = "task_help_response" // Response to a help request
|
||||
CoordinationRequest MessageType = "coordination_request" // Request for coordination
|
||||
CoordinationComplete MessageType = "coordination_complete" // Coordination session completed
|
||||
DependencyAlert MessageType = "dependency_alert" // Dependency detected
|
||||
EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed
|
||||
MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion
|
||||
TaskHelpRequest MessageType = "task_help_request" // Request for assistance
|
||||
TaskHelpResponse MessageType = "task_help_response" // Response to a help request
|
||||
CoordinationRequest MessageType = "coordination_request" // Request for coordination
|
||||
CoordinationComplete MessageType = "coordination_complete" // Coordination session completed
|
||||
DependencyAlert MessageType = "dependency_alert" // Dependency detected
|
||||
EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed
|
||||
|
||||
// Role-based collaboration messages
|
||||
RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities
|
||||
ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise
|
||||
ExpertiseResponse MessageType = "expertise_response" // Response offering expertise
|
||||
StatusUpdate MessageType = "status_update" // Regular status updates from agents
|
||||
WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles
|
||||
RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message
|
||||
MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship
|
||||
MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship
|
||||
ProjectUpdate MessageType = "project_update" // Project-level status updates
|
||||
DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete
|
||||
RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities
|
||||
ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise
|
||||
ExpertiseResponse MessageType = "expertise_response" // Response offering expertise
|
||||
StatusUpdate MessageType = "status_update" // Regular status updates from agents
|
||||
WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles
|
||||
RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message
|
||||
MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship
|
||||
MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship
|
||||
ProjectUpdate MessageType = "project_update" // Project-level status updates
|
||||
DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete
|
||||
|
||||
// RL Context Curator feedback messages
|
||||
FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning
|
||||
ContextRequest MessageType = "context_request" // Request context from HCFS
|
||||
ContextResponse MessageType = "context_response" // Response with context data
|
||||
ContextUsage MessageType = "context_usage" // Report context usage patterns
|
||||
ContextRelevance MessageType = "context_relevance" // Report context relevance scoring
|
||||
FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning
|
||||
ContextRequest MessageType = "context_request" // Request context from HCFS
|
||||
ContextResponse MessageType = "context_response" // Response with context data
|
||||
ContextUsage MessageType = "context_usage" // Report context usage patterns
|
||||
ContextRelevance MessageType = "context_relevance" // Report context relevance scoring
|
||||
|
||||
// SLURP event integration messages
|
||||
SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event
|
||||
SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt
|
||||
SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system
|
||||
SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event
|
||||
SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt
|
||||
SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system
|
||||
)
|
||||
|
||||
// Message represents a Bzzz/Antennae message
|
||||
@@ -112,12 +119,12 @@ type Message struct {
|
||||
HopCount int `json:"hop_count,omitempty"` // For Antennae hop limiting
|
||||
|
||||
// Role-based collaboration fields
|
||||
FromRole string `json:"from_role,omitempty"` // Role of sender
|
||||
ToRoles []string `json:"to_roles,omitempty"` // Target roles
|
||||
FromRole string `json:"from_role,omitempty"` // Role of sender
|
||||
ToRoles []string `json:"to_roles,omitempty"` // Target roles
|
||||
RequiredExpertise []string `json:"required_expertise,omitempty"` // Required expertise areas
|
||||
ProjectID string `json:"project_id,omitempty"` // Associated project
|
||||
Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent)
|
||||
ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID
|
||||
ProjectID string `json:"project_id,omitempty"` // Associated project
|
||||
Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent)
|
||||
ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID
|
||||
}
|
||||
|
||||
// NewPubSub creates a new PubSub instance for Bzzz coordination and HMMM meta-discussion
|
||||
@@ -150,16 +157,17 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
|
||||
}
|
||||
|
||||
p := &PubSub{
|
||||
ps: ps,
|
||||
host: h,
|
||||
ctx: pubsubCtx,
|
||||
cancel: cancel,
|
||||
chorusTopicName: chorusTopic,
|
||||
ps: ps,
|
||||
host: h,
|
||||
ctx: pubsubCtx,
|
||||
cancel: cancel,
|
||||
chorusTopicName: chorusTopic,
|
||||
hmmmTopicName: hmmmTopic,
|
||||
contextTopicName: contextTopic,
|
||||
dynamicTopics: make(map[string]*pubsub.Topic),
|
||||
dynamicSubs: make(map[string]*pubsub.Subscription),
|
||||
hypercoreLog: logger,
|
||||
dynamicTopics: make(map[string]*pubsub.Topic),
|
||||
dynamicSubs: make(map[string]*pubsub.Subscription),
|
||||
dynamicHandlers: make(map[string]func([]byte, peer.ID)),
|
||||
hypercoreLog: logger,
|
||||
}
|
||||
|
||||
// Join static topics
|
||||
@@ -177,6 +185,13 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// SetRedactor wires the SHHH sentinel so outbound messages are sanitized before publication.
|
||||
func (p *PubSub) SetRedactor(redactor *shhh.Sentinel) {
|
||||
p.redactorMux.Lock()
|
||||
defer p.redactorMux.Unlock()
|
||||
p.redactor = redactor
|
||||
}
|
||||
|
||||
// SetHmmmMessageHandler sets the handler for incoming HMMM messages.
|
||||
func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) {
|
||||
p.HmmmMessageHandler = handler
|
||||
@@ -231,15 +246,21 @@ func (p *PubSub) joinStaticTopics() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// JoinDynamicTopic joins a new topic for a specific task
|
||||
func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
||||
p.dynamicTopicsMux.Lock()
|
||||
defer p.dynamicTopicsMux.Unlock()
|
||||
p.dynamicSubsMux.Lock()
|
||||
defer p.dynamicSubsMux.Unlock()
|
||||
// subscribeDynamicTopic joins a topic and optionally assigns a raw handler.
|
||||
func (p *PubSub) subscribeDynamicTopic(topicName string, handler func([]byte, peer.ID)) error {
|
||||
if topicName == "" {
|
||||
return fmt.Errorf("topic name cannot be empty")
|
||||
}
|
||||
|
||||
if _, exists := p.dynamicTopics[topicName]; exists {
|
||||
return nil // Already joined
|
||||
p.dynamicTopicsMux.RLock()
|
||||
_, exists := p.dynamicTopics[topicName]
|
||||
p.dynamicTopicsMux.RUnlock()
|
||||
|
||||
if exists {
|
||||
p.dynamicHandlersMux.Lock()
|
||||
p.dynamicHandlers[topicName] = handler
|
||||
p.dynamicHandlersMux.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
topic, err := p.ps.Join(topicName)
|
||||
@@ -253,16 +274,46 @@ func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
||||
return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err)
|
||||
}
|
||||
|
||||
p.dynamicTopicsMux.Lock()
|
||||
if _, already := p.dynamicTopics[topicName]; already {
|
||||
p.dynamicTopicsMux.Unlock()
|
||||
sub.Cancel()
|
||||
topic.Close()
|
||||
p.dynamicHandlersMux.Lock()
|
||||
p.dynamicHandlers[topicName] = handler
|
||||
p.dynamicHandlersMux.Unlock()
|
||||
return nil
|
||||
}
|
||||
p.dynamicTopics[topicName] = topic
|
||||
p.dynamicSubs[topicName] = sub
|
||||
p.dynamicTopicsMux.Unlock()
|
||||
|
||||
// Start a handler for this new subscription
|
||||
go p.handleDynamicMessages(sub)
|
||||
p.dynamicSubsMux.Lock()
|
||||
p.dynamicSubs[topicName] = sub
|
||||
p.dynamicSubsMux.Unlock()
|
||||
|
||||
p.dynamicHandlersMux.Lock()
|
||||
p.dynamicHandlers[topicName] = handler
|
||||
p.dynamicHandlersMux.Unlock()
|
||||
|
||||
go p.handleDynamicMessages(topicName, sub)
|
||||
|
||||
fmt.Printf("✅ Joined dynamic topic: %s\n", topicName)
|
||||
return nil
|
||||
}
|
||||
|
||||
// JoinDynamicTopic joins a new topic for a specific task
|
||||
func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
||||
return p.subscribeDynamicTopic(topicName, nil)
|
||||
}
|
||||
|
||||
// SubscribeRawTopic joins a topic and delivers raw payloads to the provided handler.
|
||||
func (p *PubSub) SubscribeRawTopic(topicName string, handler func([]byte, peer.ID)) error {
|
||||
if handler == nil {
|
||||
return fmt.Errorf("handler cannot be nil")
|
||||
}
|
||||
return p.subscribeDynamicTopic(topicName, handler)
|
||||
}
|
||||
|
||||
// JoinRoleBasedTopics joins topics based on role and expertise
|
||||
func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error {
|
||||
var topicsToJoin []string
|
||||
@@ -324,6 +375,10 @@ func (p *PubSub) LeaveDynamicTopic(topicName string) {
|
||||
delete(p.dynamicTopics, topicName)
|
||||
}
|
||||
|
||||
p.dynamicHandlersMux.Lock()
|
||||
delete(p.dynamicHandlers, topicName)
|
||||
p.dynamicHandlersMux.Unlock()
|
||||
|
||||
fmt.Printf("🗑️ Left dynamic topic: %s\n", topicName)
|
||||
}
|
||||
|
||||
@@ -337,11 +392,12 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da
|
||||
return fmt.Errorf("not subscribed to dynamic topic: %s", topicName)
|
||||
}
|
||||
|
||||
payload := p.sanitizePayload(topicName, msgType, data)
|
||||
msg := Message{
|
||||
Type: msgType,
|
||||
From: p.host.ID().String(),
|
||||
Timestamp: time.Now(),
|
||||
Data: data,
|
||||
Data: payload,
|
||||
}
|
||||
|
||||
msgBytes, err := json.Marshal(msg)
|
||||
@@ -356,34 +412,35 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da
|
||||
// wrapping it in the CHORUS Message envelope. Intended for HMMM per-issue rooms
|
||||
// or other modules that maintain their own schemas.
|
||||
func (p *PubSub) PublishRaw(topicName string, payload []byte) error {
|
||||
// Dynamic topic
|
||||
p.dynamicTopicsMux.RLock()
|
||||
if topic, exists := p.dynamicTopics[topicName]; exists {
|
||||
p.dynamicTopicsMux.RUnlock()
|
||||
return topic.Publish(p.ctx, payload)
|
||||
}
|
||||
p.dynamicTopicsMux.RUnlock()
|
||||
// Dynamic topic
|
||||
p.dynamicTopicsMux.RLock()
|
||||
if topic, exists := p.dynamicTopics[topicName]; exists {
|
||||
p.dynamicTopicsMux.RUnlock()
|
||||
return topic.Publish(p.ctx, payload)
|
||||
}
|
||||
p.dynamicTopicsMux.RUnlock()
|
||||
|
||||
// Static topics by name
|
||||
switch topicName {
|
||||
case p.chorusTopicName:
|
||||
return p.chorusTopic.Publish(p.ctx, payload)
|
||||
case p.hmmmTopicName:
|
||||
return p.hmmmTopic.Publish(p.ctx, payload)
|
||||
case p.contextTopicName:
|
||||
return p.contextTopic.Publish(p.ctx, payload)
|
||||
default:
|
||||
return fmt.Errorf("not subscribed to topic: %s", topicName)
|
||||
}
|
||||
// Static topics by name
|
||||
switch topicName {
|
||||
case p.chorusTopicName:
|
||||
return p.chorusTopic.Publish(p.ctx, payload)
|
||||
case p.hmmmTopicName:
|
||||
return p.hmmmTopic.Publish(p.ctx, payload)
|
||||
case p.contextTopicName:
|
||||
return p.contextTopic.Publish(p.ctx, payload)
|
||||
default:
|
||||
return fmt.Errorf("not subscribed to topic: %s", topicName)
|
||||
}
|
||||
}
|
||||
|
||||
// PublishBzzzMessage publishes a message to the Bzzz coordination topic
|
||||
func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error {
|
||||
payload := p.sanitizePayload(p.chorusTopicName, msgType, data)
|
||||
msg := Message{
|
||||
Type: msgType,
|
||||
From: p.host.ID().String(),
|
||||
Timestamp: time.Now(),
|
||||
Data: data,
|
||||
Data: payload,
|
||||
}
|
||||
|
||||
msgBytes, err := json.Marshal(msg)
|
||||
@@ -396,11 +453,12 @@ func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interfa
|
||||
|
||||
// PublishHmmmMessage publishes a message to the HMMM meta-discussion topic
|
||||
func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error {
|
||||
payload := p.sanitizePayload(p.hmmmTopicName, msgType, data)
|
||||
msg := Message{
|
||||
Type: msgType,
|
||||
From: p.host.ID().String(),
|
||||
Timestamp: time.Now(),
|
||||
Data: data,
|
||||
Data: payload,
|
||||
}
|
||||
|
||||
msgBytes, err := json.Marshal(msg)
|
||||
@@ -425,11 +483,12 @@ func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.I
|
||||
|
||||
// PublishContextFeedbackMessage publishes a message to the Context Feedback topic
|
||||
func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error {
|
||||
payload := p.sanitizePayload(p.contextTopicName, msgType, data)
|
||||
msg := Message{
|
||||
Type: msgType,
|
||||
From: p.host.ID().String(),
|
||||
Timestamp: time.Now(),
|
||||
Data: data,
|
||||
Data: payload,
|
||||
}
|
||||
|
||||
msgBytes, err := json.Marshal(msg)
|
||||
@@ -442,11 +501,16 @@ func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[str
|
||||
|
||||
// PublishRoleBasedMessage publishes a role-based collaboration message
|
||||
func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error {
|
||||
topicName := p.chorusTopicName
|
||||
if isRoleMessage(msgType) {
|
||||
topicName = p.hmmmTopicName
|
||||
}
|
||||
payload := p.sanitizePayload(topicName, msgType, data)
|
||||
msg := Message{
|
||||
Type: msgType,
|
||||
From: p.host.ID().String(),
|
||||
Timestamp: time.Now(),
|
||||
Data: data,
|
||||
Data: payload,
|
||||
FromRole: opts.FromRole,
|
||||
ToRoles: opts.ToRoles,
|
||||
RequiredExpertise: opts.RequiredExpertise,
|
||||
@@ -462,10 +526,8 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in
|
||||
|
||||
// Determine which topic to use based on message type
|
||||
var topic *pubsub.Topic
|
||||
switch msgType {
|
||||
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate,
|
||||
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
|
||||
ProjectUpdate, DeliverableReady:
|
||||
switch {
|
||||
case isRoleMessage(msgType):
|
||||
topic = p.hmmmTopic // Use HMMM topic for role-based messages
|
||||
default:
|
||||
topic = p.chorusTopic // Default to Bzzz topic
|
||||
@@ -492,12 +554,12 @@ func (p *PubSub) PublishSlurpContextUpdate(data map[string]interface{}) error {
|
||||
// PublishSlurpIntegrationEvent publishes a generic SLURP integration event
|
||||
func (p *PubSub) PublishSlurpIntegrationEvent(eventType string, discussionID string, slurpEvent map[string]interface{}) error {
|
||||
data := map[string]interface{}{
|
||||
"event_type": eventType,
|
||||
"discussion_id": discussionID,
|
||||
"slurp_event": slurpEvent,
|
||||
"timestamp": time.Now(),
|
||||
"source": "hmmm-slurp-integration",
|
||||
"peer_id": p.host.ID().String(),
|
||||
"event_type": eventType,
|
||||
"discussion_id": discussionID,
|
||||
"slurp_event": slurpEvent,
|
||||
"timestamp": time.Now(),
|
||||
"source": "hmmm-slurp-integration",
|
||||
"peer_id": p.host.ID().String(),
|
||||
}
|
||||
|
||||
return p.PublishSlurpEventGenerated(data)
|
||||
@@ -604,15 +666,23 @@ func (p *PubSub) handleContextFeedbackMessages() {
|
||||
}
|
||||
}
|
||||
|
||||
// getDynamicHandler returns the raw handler for a topic if registered.
|
||||
func (p *PubSub) getDynamicHandler(topicName string) func([]byte, peer.ID) {
|
||||
p.dynamicHandlersMux.RLock()
|
||||
handler := p.dynamicHandlers[topicName]
|
||||
p.dynamicHandlersMux.RUnlock()
|
||||
return handler
|
||||
}
|
||||
|
||||
// handleDynamicMessages processes messages from a dynamic topic subscription
|
||||
func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
|
||||
func (p *PubSub) handleDynamicMessages(topicName string, sub *pubsub.Subscription) {
|
||||
for {
|
||||
msg, err := sub.Next(p.ctx)
|
||||
if err != nil {
|
||||
if p.ctx.Err() != nil || err.Error() == "subscription cancelled" {
|
||||
return // Subscription was cancelled, exit handler
|
||||
}
|
||||
fmt.Printf("❌ Error receiving dynamic message: %v\n", err)
|
||||
fmt.Printf("❌ Error receiving dynamic message on %s: %v\n", topicName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -620,13 +690,18 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
|
||||
continue
|
||||
}
|
||||
|
||||
var dynamicMsg Message
|
||||
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil {
|
||||
fmt.Printf("❌ Failed to unmarshal dynamic message: %v\n", err)
|
||||
if handler := p.getDynamicHandler(topicName); handler != nil {
|
||||
handler(msg.Data, msg.ReceivedFrom)
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the main HMMM handler for all dynamic messages
|
||||
var dynamicMsg Message
|
||||
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil {
|
||||
fmt.Printf("❌ Failed to unmarshal dynamic message on %s: %v\n", topicName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the main HMMM handler for all dynamic messages without custom handlers
|
||||
if p.HmmmMessageHandler != nil {
|
||||
p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom)
|
||||
}
|
||||
@@ -732,17 +807,17 @@ func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
|
||||
// Log to hypercore if logger is available
|
||||
if p.hypercoreLog != nil {
|
||||
logData := map[string]interface{}{
|
||||
"message_type": string(msg.Type),
|
||||
"from_peer": from.String(),
|
||||
"from_short": from.ShortString(),
|
||||
"timestamp": msg.Timestamp,
|
||||
"data": msg.Data,
|
||||
"topic": "context_feedback",
|
||||
"from_role": msg.FromRole,
|
||||
"to_roles": msg.ToRoles,
|
||||
"project_id": msg.ProjectID,
|
||||
"priority": msg.Priority,
|
||||
"thread_id": msg.ThreadID,
|
||||
"message_type": string(msg.Type),
|
||||
"from_peer": from.String(),
|
||||
"from_short": from.ShortString(),
|
||||
"timestamp": msg.Timestamp,
|
||||
"data": msg.Data,
|
||||
"topic": "context_feedback",
|
||||
"from_role": msg.FromRole,
|
||||
"to_roles": msg.ToRoles,
|
||||
"project_id": msg.ProjectID,
|
||||
"priority": msg.Priority,
|
||||
"thread_id": msg.ThreadID,
|
||||
}
|
||||
|
||||
// Map context feedback message types to hypercore log types
|
||||
@@ -764,6 +839,68 @@ func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PubSub) sanitizePayload(topic string, msgType MessageType, data map[string]interface{}) map[string]interface{} {
|
||||
if data == nil {
|
||||
return nil
|
||||
}
|
||||
cloned := clonePayloadMap(data)
|
||||
p.redactorMux.RLock()
|
||||
redactor := p.redactor
|
||||
p.redactorMux.RUnlock()
|
||||
if redactor != nil {
|
||||
labels := map[string]string{
|
||||
"source": "pubsub",
|
||||
"topic": topic,
|
||||
"message_type": string(msgType),
|
||||
}
|
||||
redactor.RedactMapWithLabels(context.Background(), cloned, labels)
|
||||
}
|
||||
return cloned
|
||||
}
|
||||
|
||||
func isRoleMessage(msgType MessageType) bool {
|
||||
switch msgType {
|
||||
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate,
|
||||
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
|
||||
ProjectUpdate, DeliverableReady:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func clonePayloadMap(in map[string]interface{}) map[string]interface{} {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := make(map[string]interface{}, len(in))
|
||||
for k, v := range in {
|
||||
out[k] = clonePayloadValue(v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func clonePayloadValue(v interface{}) interface{} {
|
||||
switch tv := v.(type) {
|
||||
case map[string]interface{}:
|
||||
return clonePayloadMap(tv)
|
||||
case []interface{}:
|
||||
return clonePayloadSlice(tv)
|
||||
case []string:
|
||||
return append([]string(nil), tv...)
|
||||
default:
|
||||
return tv
|
||||
}
|
||||
}
|
||||
|
||||
func clonePayloadSlice(in []interface{}) []interface{} {
|
||||
out := make([]interface{}, len(in))
|
||||
for i, val := range in {
|
||||
out[i] = clonePayloadValue(val)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Close shuts down the PubSub instance
|
||||
func (p *PubSub) Close() error {
|
||||
p.cancel()
|
||||
@@ -788,6 +925,12 @@ func (p *PubSub) Close() error {
|
||||
p.contextTopic.Close()
|
||||
}
|
||||
|
||||
p.dynamicSubsMux.Lock()
|
||||
for _, sub := range p.dynamicSubs {
|
||||
sub.Cancel()
|
||||
}
|
||||
p.dynamicSubsMux.Unlock()
|
||||
|
||||
p.dynamicTopicsMux.Lock()
|
||||
for _, topic := range p.dynamicTopics {
|
||||
topic.Close()
|
||||
|
||||
Reference in New Issue
Block a user