Harden CHORUS security and messaging stack
This commit is contained in:
@@ -10,7 +10,7 @@ CHORUS is the runtime that ties the CHORUS ecosystem together: libp2p mesh, DHT-
|
|||||||
| DHT + DecisionPublisher | ✅ Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. |
|
| DHT + DecisionPublisher | ✅ Running | Encrypted storage wired through `pkg/dht`; decisions written via `ucxl.DecisionPublisher`. |
|
||||||
| Election manager | ✅ Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. |
|
| Election manager | ✅ Running | Admin election integrated with Backbeat; metrics exposed under `pkg/metrics`. |
|
||||||
| SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. |
|
| SLURP (context intelligence) | 🚧 Stubbed | `pkg/slurp/slurp.go` contains TODOs for resolver, temporal graphs, intelligence. Leader integration scaffolding exists but uses placeholder IDs/request forwarding. |
|
||||||
| SHHH (secrets sentinel) | ❌ Not implemented | No `pkg/shhh` module yet; redaction hooks are pending. |
|
| SHHH (secrets sentinel) | 🚧 Sentinel live | `pkg/shhh` redacts hypercore + PubSub payloads with audit + metrics hooks (policy replay TBD). |
|
||||||
| HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). |
|
| HMMM routing | 🚧 Partial | PubSub topics join, but capability/role announcements and HMMM router wiring are placeholders (`internal/runtime/agent_support.go`). |
|
||||||
|
|
||||||
See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing.
|
See `docs/progress/CHORUS-WHOOSH-development-plan.md` for the detailed build plan and `docs/progress/CHORUS-WHOOSH-roadmap.md` for sequencing.
|
||||||
@@ -33,7 +33,7 @@ You’ll get a single agent container with:
|
|||||||
- DHT storage (AGE-encrypted)
|
- DHT storage (AGE-encrypted)
|
||||||
- HTTP API + health endpoints
|
- HTTP API + health endpoints
|
||||||
|
|
||||||
**Missing today:** SLURP context resolution, SHHH redaction, HMMM per-issue routing. Expect log warnings/TODOs for those paths.
|
**Missing today:** SLURP context resolution, advanced SHHH policy replay, HMMM per-issue routing. Expect log warnings/TODOs for those paths.
|
||||||
|
|
||||||
## Roadmap Highlights
|
## Roadmap Highlights
|
||||||
|
|
||||||
|
|||||||
@@ -9,50 +9,57 @@ import (
|
|||||||
|
|
||||||
"chorus/internal/logging"
|
"chorus/internal/logging"
|
||||||
"chorus/pkg/config"
|
"chorus/pkg/config"
|
||||||
"chorus/pubsub"
|
|
||||||
"chorus/pkg/repository"
|
|
||||||
"chorus/pkg/hmmm"
|
"chorus/pkg/hmmm"
|
||||||
|
"chorus/pkg/repository"
|
||||||
|
"chorus/pubsub"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/libp2p/go-libp2p/core/peer"
|
"github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TaskProgressTracker is notified when tasks start and complete so availability broadcasts stay accurate.
|
||||||
|
type TaskProgressTracker interface {
|
||||||
|
AddTask(taskID string)
|
||||||
|
RemoveTask(taskID string)
|
||||||
|
}
|
||||||
|
|
||||||
// TaskCoordinator manages task discovery, assignment, and execution across multiple repositories
|
// TaskCoordinator manages task discovery, assignment, and execution across multiple repositories
|
||||||
type TaskCoordinator struct {
|
type TaskCoordinator struct {
|
||||||
pubsub *pubsub.PubSub
|
pubsub *pubsub.PubSub
|
||||||
hlog *logging.HypercoreLog
|
hlog *logging.HypercoreLog
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
config *config.Config
|
config *config.Config
|
||||||
hmmmRouter *hmmm.Router
|
hmmmRouter *hmmm.Router
|
||||||
|
|
||||||
// Repository management
|
// Repository management
|
||||||
providers map[int]repository.TaskProvider // projectID -> provider
|
providers map[int]repository.TaskProvider // projectID -> provider
|
||||||
providerLock sync.RWMutex
|
providerLock sync.RWMutex
|
||||||
factory repository.ProviderFactory
|
factory repository.ProviderFactory
|
||||||
|
|
||||||
// Task management
|
// Task management
|
||||||
activeTasks map[string]*ActiveTask // taskKey -> active task
|
activeTasks map[string]*ActiveTask // taskKey -> active task
|
||||||
taskLock sync.RWMutex
|
taskLock sync.RWMutex
|
||||||
taskMatcher repository.TaskMatcher
|
taskMatcher repository.TaskMatcher
|
||||||
|
taskTracker TaskProgressTracker
|
||||||
|
|
||||||
// Agent tracking
|
// Agent tracking
|
||||||
nodeID string
|
nodeID string
|
||||||
agentInfo *repository.AgentInfo
|
agentInfo *repository.AgentInfo
|
||||||
|
|
||||||
// Sync settings
|
// Sync settings
|
||||||
syncInterval time.Duration
|
syncInterval time.Duration
|
||||||
lastSync map[int]time.Time
|
lastSync map[int]time.Time
|
||||||
syncLock sync.RWMutex
|
syncLock sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// ActiveTask represents a task currently being worked on
|
// ActiveTask represents a task currently being worked on
|
||||||
type ActiveTask struct {
|
type ActiveTask struct {
|
||||||
Task *repository.Task
|
Task *repository.Task
|
||||||
Provider repository.TaskProvider
|
Provider repository.TaskProvider
|
||||||
ProjectID int
|
ProjectID int
|
||||||
ClaimedAt time.Time
|
ClaimedAt time.Time
|
||||||
Status string // claimed, working, completed, failed
|
Status string // claimed, working, completed, failed
|
||||||
AgentID string
|
AgentID string
|
||||||
Results map[string]interface{}
|
Results map[string]interface{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTaskCoordinator creates a new task coordinator
|
// NewTaskCoordinator creates a new task coordinator
|
||||||
@@ -63,7 +70,9 @@ func NewTaskCoordinator(
|
|||||||
cfg *config.Config,
|
cfg *config.Config,
|
||||||
nodeID string,
|
nodeID string,
|
||||||
hmmmRouter *hmmm.Router,
|
hmmmRouter *hmmm.Router,
|
||||||
|
tracker TaskProgressTracker,
|
||||||
) *TaskCoordinator {
|
) *TaskCoordinator {
|
||||||
|
|
||||||
coordinator := &TaskCoordinator{
|
coordinator := &TaskCoordinator{
|
||||||
pubsub: ps,
|
pubsub: ps,
|
||||||
hlog: hlog,
|
hlog: hlog,
|
||||||
@@ -75,10 +84,11 @@ func NewTaskCoordinator(
|
|||||||
lastSync: make(map[int]time.Time),
|
lastSync: make(map[int]time.Time),
|
||||||
factory: &repository.DefaultProviderFactory{},
|
factory: &repository.DefaultProviderFactory{},
|
||||||
taskMatcher: &repository.DefaultTaskMatcher{},
|
taskMatcher: &repository.DefaultTaskMatcher{},
|
||||||
|
taskTracker: tracker,
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
syncInterval: 30 * time.Second,
|
syncInterval: 30 * time.Second,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create agent info from config
|
// Create agent info from config
|
||||||
coordinator.agentInfo = &repository.AgentInfo{
|
coordinator.agentInfo = &repository.AgentInfo{
|
||||||
ID: cfg.Agent.ID,
|
ID: cfg.Agent.ID,
|
||||||
@@ -91,23 +101,23 @@ func NewTaskCoordinator(
|
|||||||
Performance: map[string]interface{}{"score": 0.8}, // Default performance score
|
Performance: map[string]interface{}{"score": 0.8}, // Default performance score
|
||||||
Availability: "available",
|
Availability: "available",
|
||||||
}
|
}
|
||||||
|
|
||||||
return coordinator
|
return coordinator
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start begins the task coordination process
|
// Start begins the task coordination process
|
||||||
func (tc *TaskCoordinator) Start() {
|
func (tc *TaskCoordinator) Start() {
|
||||||
fmt.Printf("🎯 Starting task coordinator for agent %s (%s)\n", tc.agentInfo.ID, tc.agentInfo.Role)
|
fmt.Printf("🎯 Starting task coordinator for agent %s (%s)\n", tc.agentInfo.ID, tc.agentInfo.Role)
|
||||||
|
|
||||||
// Announce role and capabilities
|
// Announce role and capabilities
|
||||||
tc.announceAgentRole()
|
tc.announceAgentRole()
|
||||||
|
|
||||||
// Start periodic task discovery and sync
|
// Start periodic task discovery and sync
|
||||||
go tc.taskDiscoveryLoop()
|
go tc.taskDiscoveryLoop()
|
||||||
|
|
||||||
// Start role-based message handling
|
// Start role-based message handling
|
||||||
tc.pubsub.SetAntennaeMessageHandler(tc.handleRoleMessage)
|
tc.pubsub.SetAntennaeMessageHandler(tc.handleRoleMessage)
|
||||||
|
|
||||||
fmt.Printf("✅ Task coordinator started\n")
|
fmt.Printf("✅ Task coordinator started\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,13 +195,17 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor
|
|||||||
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
||||||
tc.taskLock.Unlock()
|
tc.taskLock.Unlock()
|
||||||
|
|
||||||
|
if tc.taskTracker != nil {
|
||||||
|
tc.taskTracker.AddTask(taskKey)
|
||||||
|
}
|
||||||
|
|
||||||
// Log task claim
|
// Log task claim
|
||||||
tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{
|
tc.hlog.Append(logging.TaskClaimed, map[string]interface{}{
|
||||||
"task_number": task.Number,
|
"task_number": task.Number,
|
||||||
"repository": task.Repository,
|
"repository": task.Repository,
|
||||||
"title": task.Title,
|
"title": task.Title,
|
||||||
"required_role": task.RequiredRole,
|
"required_role": task.RequiredRole,
|
||||||
"priority": task.Priority,
|
"priority": task.Priority,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Announce task claim
|
// Announce task claim
|
||||||
@@ -212,11 +226,11 @@ func (tc *TaskCoordinator) processTask(task *repository.Task, provider repositor
|
|||||||
}
|
}
|
||||||
if err := tc.hmmmRouter.Publish(tc.ctx, seedMsg); err != nil {
|
if err := tc.hmmmRouter.Publish(tc.ctx, seedMsg); err != nil {
|
||||||
fmt.Printf("⚠️ Failed to seed HMMM room for task %d: %v\n", task.Number, err)
|
fmt.Printf("⚠️ Failed to seed HMMM room for task %d: %v\n", task.Number, err)
|
||||||
tc.hlog.AppendString("system_error", map[string]interface{}{
|
tc.hlog.AppendString("system_error", map[string]interface{}{
|
||||||
"error": "hmmm_seed_failed",
|
"error": "hmmm_seed_failed",
|
||||||
"task_number": task.Number,
|
"task_number": task.Number,
|
||||||
"repository": task.Repository,
|
"repository": task.Repository,
|
||||||
"message": err.Error(),
|
"message": err.Error(),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("🐜 Seeded HMMM room for task %d\n", task.Number)
|
fmt.Printf("🐜 Seeded HMMM room for task %d\n", task.Number)
|
||||||
@@ -259,14 +273,14 @@ func (tc *TaskCoordinator) shouldRequestCollaboration(task *repository.Task) boo
|
|||||||
// requestTaskCollaboration requests collaboration for a task
|
// requestTaskCollaboration requests collaboration for a task
|
||||||
func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) {
|
func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) {
|
||||||
data := map[string]interface{}{
|
data := map[string]interface{}{
|
||||||
"task_number": task.Number,
|
"task_number": task.Number,
|
||||||
"repository": task.Repository,
|
"repository": task.Repository,
|
||||||
"title": task.Title,
|
"title": task.Title,
|
||||||
"required_role": task.RequiredRole,
|
"required_role": task.RequiredRole,
|
||||||
"required_expertise": task.RequiredExpertise,
|
"required_expertise": task.RequiredExpertise,
|
||||||
"priority": task.Priority,
|
"priority": task.Priority,
|
||||||
"requester_role": tc.agentInfo.Role,
|
"requester_role": tc.agentInfo.Role,
|
||||||
"reason": "expertise_gap",
|
"reason": "expertise_gap",
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := pubsub.MessageOptions{
|
opts := pubsub.MessageOptions{
|
||||||
@@ -288,7 +302,7 @@ func (tc *TaskCoordinator) requestTaskCollaboration(task *repository.Task) {
|
|||||||
// executeTask executes a claimed task
|
// executeTask executes a claimed task
|
||||||
func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
||||||
taskKey := fmt.Sprintf("%s:%d", activeTask.Task.Repository, activeTask.Task.Number)
|
taskKey := fmt.Sprintf("%s:%d", activeTask.Task.Repository, activeTask.Task.Number)
|
||||||
|
|
||||||
// Update status
|
// Update status
|
||||||
tc.taskLock.Lock()
|
tc.taskLock.Lock()
|
||||||
activeTask.Status = "working"
|
activeTask.Status = "working"
|
||||||
@@ -302,10 +316,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
|||||||
|
|
||||||
// Complete the task
|
// Complete the task
|
||||||
results := map[string]interface{}{
|
results := map[string]interface{}{
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"completion_time": time.Now().Format(time.RFC3339),
|
"completion_time": time.Now().Format(time.RFC3339),
|
||||||
"agent_id": tc.agentInfo.ID,
|
"agent_id": tc.agentInfo.ID,
|
||||||
"agent_role": tc.agentInfo.Role,
|
"agent_role": tc.agentInfo.Role,
|
||||||
}
|
}
|
||||||
|
|
||||||
taskResult := &repository.TaskResult{
|
taskResult := &repository.TaskResult{
|
||||||
@@ -316,13 +330,13 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
|||||||
err := activeTask.Provider.CompleteTask(activeTask.Task, taskResult)
|
err := activeTask.Provider.CompleteTask(activeTask.Task, taskResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("❌ Failed to complete task %s #%d: %v\n", activeTask.Task.Repository, activeTask.Task.Number, err)
|
fmt.Printf("❌ Failed to complete task %s #%d: %v\n", activeTask.Task.Repository, activeTask.Task.Number, err)
|
||||||
|
|
||||||
// Update status to failed
|
// Update status to failed
|
||||||
tc.taskLock.Lock()
|
tc.taskLock.Lock()
|
||||||
activeTask.Status = "failed"
|
activeTask.Status = "failed"
|
||||||
activeTask.Results = map[string]interface{}{"error": err.Error()}
|
activeTask.Results = map[string]interface{}{"error": err.Error()}
|
||||||
tc.taskLock.Unlock()
|
tc.taskLock.Unlock()
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -334,6 +348,10 @@ func (tc *TaskCoordinator) executeTask(activeTask *ActiveTask) {
|
|||||||
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
tc.agentInfo.CurrentTasks = len(tc.activeTasks)
|
||||||
tc.taskLock.Unlock()
|
tc.taskLock.Unlock()
|
||||||
|
|
||||||
|
if tc.taskTracker != nil {
|
||||||
|
tc.taskTracker.RemoveTask(taskKey)
|
||||||
|
}
|
||||||
|
|
||||||
// Log completion
|
// Log completion
|
||||||
tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{
|
tc.hlog.Append(logging.TaskCompleted, map[string]interface{}{
|
||||||
"task_number": activeTask.Task.Number,
|
"task_number": activeTask.Task.Number,
|
||||||
@@ -378,19 +396,19 @@ func (tc *TaskCoordinator) announceAgentRole() {
|
|||||||
// announceTaskClaim announces that this agent has claimed a task
|
// announceTaskClaim announces that this agent has claimed a task
|
||||||
func (tc *TaskCoordinator) announceTaskClaim(task *repository.Task) {
|
func (tc *TaskCoordinator) announceTaskClaim(task *repository.Task) {
|
||||||
data := map[string]interface{}{
|
data := map[string]interface{}{
|
||||||
"task_number": task.Number,
|
"task_number": task.Number,
|
||||||
"repository": task.Repository,
|
"repository": task.Repository,
|
||||||
"title": task.Title,
|
"title": task.Title,
|
||||||
"agent_id": tc.agentInfo.ID,
|
"agent_id": tc.agentInfo.ID,
|
||||||
"agent_role": tc.agentInfo.Role,
|
"agent_role": tc.agentInfo.Role,
|
||||||
"claim_time": time.Now().Format(time.RFC3339),
|
"claim_time": time.Now().Format(time.RFC3339),
|
||||||
"estimated_completion": time.Now().Add(time.Hour).Format(time.RFC3339),
|
"estimated_completion": time.Now().Add(time.Hour).Format(time.RFC3339),
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := pubsub.MessageOptions{
|
opts := pubsub.MessageOptions{
|
||||||
FromRole: tc.agentInfo.Role,
|
FromRole: tc.agentInfo.Role,
|
||||||
Priority: "medium",
|
Priority: "medium",
|
||||||
ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number),
|
ThreadID: fmt.Sprintf("task-%s-%d", task.Repository, task.Number),
|
||||||
}
|
}
|
||||||
|
|
||||||
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskProgress, data, opts)
|
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskProgress, data, opts)
|
||||||
@@ -463,15 +481,15 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks {
|
if canHelp && tc.agentInfo.CurrentTasks < tc.agentInfo.MaxTasks {
|
||||||
// Offer help
|
// Offer help
|
||||||
responseData := map[string]interface{}{
|
responseData := map[string]interface{}{
|
||||||
"agent_id": tc.agentInfo.ID,
|
"agent_id": tc.agentInfo.ID,
|
||||||
"agent_role": tc.agentInfo.Role,
|
"agent_role": tc.agentInfo.Role,
|
||||||
"expertise": tc.agentInfo.Expertise,
|
"expertise": tc.agentInfo.Expertise,
|
||||||
"availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks,
|
"availability": tc.agentInfo.MaxTasks - tc.agentInfo.CurrentTasks,
|
||||||
"offer_type": "collaboration",
|
"offer_type": "collaboration",
|
||||||
"response_to": msg.Data,
|
"response_to": msg.Data,
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := pubsub.MessageOptions{
|
opts := pubsub.MessageOptions{
|
||||||
@@ -480,34 +498,34 @@ func (tc *TaskCoordinator) handleTaskHelpRequest(msg pubsub.Message, from peer.I
|
|||||||
ThreadID: msg.ThreadID,
|
ThreadID: msg.ThreadID,
|
||||||
}
|
}
|
||||||
|
|
||||||
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts)
|
err := tc.pubsub.PublishRoleBasedMessage(pubsub.TaskHelpResponse, responseData, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("⚠️ Failed to offer help: %v\n", err)
|
fmt.Printf("⚠️ Failed to offer help: %v\n", err)
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("🤝 Offered help for task collaboration\n")
|
fmt.Printf("🤝 Offered help for task collaboration\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also reflect the help offer into the HMMM per-issue room (best-effort)
|
// Also reflect the help offer into the HMMM per-issue room (best-effort)
|
||||||
if tc.hmmmRouter != nil {
|
if tc.hmmmRouter != nil {
|
||||||
if tn, ok := msg.Data["task_number"].(float64); ok {
|
if tn, ok := msg.Data["task_number"].(float64); ok {
|
||||||
issueID := int64(tn)
|
issueID := int64(tn)
|
||||||
hmsg := hmmm.Message{
|
hmsg := hmmm.Message{
|
||||||
Version: 1,
|
Version: 1,
|
||||||
Type: "meta_msg",
|
Type: "meta_msg",
|
||||||
IssueID: issueID,
|
IssueID: issueID,
|
||||||
ThreadID: fmt.Sprintf("issue-%d", issueID),
|
ThreadID: fmt.Sprintf("issue-%d", issueID),
|
||||||
MsgID: uuid.New().String(),
|
MsgID: uuid.New().String(),
|
||||||
NodeID: tc.nodeID,
|
NodeID: tc.nodeID,
|
||||||
HopCount: 0,
|
HopCount: 0,
|
||||||
Timestamp: time.Now().UTC(),
|
Timestamp: time.Now().UTC(),
|
||||||
Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks),
|
Message: fmt.Sprintf("Help offer from %s (availability %d)", tc.agentInfo.Role, tc.agentInfo.MaxTasks-tc.agentInfo.CurrentTasks),
|
||||||
}
|
}
|
||||||
if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil {
|
if err := tc.hmmmRouter.Publish(tc.ctx, hmsg); err != nil {
|
||||||
fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err)
|
fmt.Printf("⚠️ Failed to reflect help into HMMM: %v\n", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleExpertiseRequest handles requests for specific expertise
|
// handleExpertiseRequest handles requests for specific expertise
|
||||||
|
|||||||
30
docs/decisions/2025-02-16-shhh-sentinel-foundation.md
Normal file
30
docs/decisions/2025-02-16-shhh-sentinel-foundation.md
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Decision Record: Establish SHHH Sentinel Foundations
|
||||||
|
|
||||||
|
- **Date:** 2025-02-16
|
||||||
|
- **Status:** Accepted
|
||||||
|
- **Context:** CHORUS roadmap Phase 1 requires a secrets sentinel (`pkg/shhh`) before we wire COOEE/WHOOSH telemetry and audit plumbing. The runtime previously emitted placeholder TODOs and logged sensitive payloads without guard rails.
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
- We lacked a reusable component to detect and redact secrets prior to log/telemetry fan-out.
|
||||||
|
- Without a dedicated sentinel we could not attach audit sinks or surface metrics for redaction events, blocking roadmap item `SEC-SHHH`.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
- Introduce `pkg/shhh` as the SHHH sentinel with:
|
||||||
|
- Curated default rules (API keys, bearer/OAuth tokens, private key PEM blocks, OpenAI secrets).
|
||||||
|
- Extensible configuration for custom regex rules and per-rule severity/tags.
|
||||||
|
- Optional audit sink and statistics collection for integration with COOEE/WHOOSH pipelines.
|
||||||
|
- Helpers to redact free-form text and `map[string]any` payloads used by our logging pipeline.
|
||||||
|
|
||||||
|
## Rationale
|
||||||
|
- Starting with a focused set of high-signal rules gives immediate coverage for the most damaging leak classes without delaying larger SLURP/SHHH workstreams.
|
||||||
|
- The API mirrors other CHORUS subsystems (options, config structs, stats snapshots) so existing operators can plug metrics/audits without bespoke glue.
|
||||||
|
- Providing deterministic findings/locations simplifies future enforcement (e.g., WHOOSH UI badges, COOEE replay) while keeping implementation lean.
|
||||||
|
|
||||||
|
## Impact
|
||||||
|
- Runtime components can now instantiate SHHH and guarantee `[REDACTED]` placeholders for sensitive fields.
|
||||||
|
- Audit/event plumbing can be wired incrementally—hashes are emitted for replay without storing raw secrets.
|
||||||
|
- Future roadmap tasks (policy driven rules, replay, UCXL evidence) can extend `pkg/shhh` rather than implementing ad-hoc redaction in each subsystem.
|
||||||
|
|
||||||
|
## Related Work
|
||||||
|
- Roadmap: `docs/progress/CHORUS-WHOOSH-roadmap.md` (Phase 1.2 `SEC-SHHH`).
|
||||||
|
- README coverage gap noted in `README.md` table (SHHH not implemented).
|
||||||
2
go.mod
2
go.mod
@@ -159,4 +159,4 @@ require (
|
|||||||
lukechampine.com/blake3 v1.2.1 // indirect
|
lukechampine.com/blake3 v1.2.1 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
replace github.com/chorus-services/backbeat => /home/tony/chorus/project-queues/active/BACKBEAT/backbeat/prototype
|
replace github.com/chorus-services/backbeat => ../BACKBEAT/backbeat/prototype
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package logging
|
package logging
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
@@ -8,6 +9,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"chorus/pkg/shhh"
|
||||||
"github.com/libp2p/go-libp2p/core/peer"
|
"github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -23,12 +25,14 @@ type HypercoreLog struct {
|
|||||||
entries []LogEntry
|
entries []LogEntry
|
||||||
mutex sync.RWMutex
|
mutex sync.RWMutex
|
||||||
peerID peer.ID
|
peerID peer.ID
|
||||||
|
|
||||||
// Verification chain
|
// Verification chain
|
||||||
headHash string
|
headHash string
|
||||||
|
|
||||||
// Replication
|
// Replication
|
||||||
replicators map[peer.ID]*Replicator
|
replicators map[peer.ID]*Replicator
|
||||||
|
|
||||||
|
redactor *shhh.Sentinel
|
||||||
}
|
}
|
||||||
|
|
||||||
// LogEntry represents a single entry in the distributed log
|
// LogEntry represents a single entry in the distributed log
|
||||||
@@ -48,12 +52,12 @@ type LogType string
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// Bzzz coordination logs
|
// Bzzz coordination logs
|
||||||
TaskAnnounced LogType = "task_announced"
|
TaskAnnounced LogType = "task_announced"
|
||||||
TaskClaimed LogType = "task_claimed"
|
TaskClaimed LogType = "task_claimed"
|
||||||
TaskProgress LogType = "task_progress"
|
TaskProgress LogType = "task_progress"
|
||||||
TaskCompleted LogType = "task_completed"
|
TaskCompleted LogType = "task_completed"
|
||||||
TaskFailed LogType = "task_failed"
|
TaskFailed LogType = "task_failed"
|
||||||
|
|
||||||
// HMMM meta-discussion logs
|
// HMMM meta-discussion logs
|
||||||
PlanProposed LogType = "plan_proposed"
|
PlanProposed LogType = "plan_proposed"
|
||||||
ObjectionRaised LogType = "objection_raised"
|
ObjectionRaised LogType = "objection_raised"
|
||||||
@@ -65,17 +69,17 @@ const (
|
|||||||
TaskHelpReceived LogType = "task_help_received"
|
TaskHelpReceived LogType = "task_help_received"
|
||||||
|
|
||||||
// System logs
|
// System logs
|
||||||
PeerJoined LogType = "peer_joined"
|
PeerJoined LogType = "peer_joined"
|
||||||
PeerLeft LogType = "peer_left"
|
PeerLeft LogType = "peer_left"
|
||||||
CapabilityBcast LogType = "capability_broadcast"
|
CapabilityBcast LogType = "capability_broadcast"
|
||||||
NetworkEvent LogType = "network_event"
|
NetworkEvent LogType = "network_event"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Replicator handles log replication with other peers
|
// Replicator handles log replication with other peers
|
||||||
type Replicator struct {
|
type Replicator struct {
|
||||||
peerID peer.ID
|
peerID peer.ID
|
||||||
lastSyncIndex uint64
|
lastSyncIndex uint64
|
||||||
connected bool
|
connected bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewHypercoreLog creates a new distributed log for a peer
|
// NewHypercoreLog creates a new distributed log for a peer
|
||||||
@@ -88,6 +92,13 @@ func NewHypercoreLog(peerID peer.ID) *HypercoreLog {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetRedactor wires the SHHH sentinel so log payloads are sanitized before persistence.
|
||||||
|
func (h *HypercoreLog) SetRedactor(redactor *shhh.Sentinel) {
|
||||||
|
h.mutex.Lock()
|
||||||
|
defer h.mutex.Unlock()
|
||||||
|
h.redactor = redactor
|
||||||
|
}
|
||||||
|
|
||||||
// AppendString is a convenience method for string log types (to match interface)
|
// AppendString is a convenience method for string log types (to match interface)
|
||||||
func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error {
|
func (h *HypercoreLog) AppendString(logType string, data map[string]interface{}) error {
|
||||||
_, err := h.Append(LogType(logType), data)
|
_, err := h.Append(LogType(logType), data)
|
||||||
@@ -98,38 +109,40 @@ func (h *HypercoreLog) AppendString(logType string, data map[string]interface{})
|
|||||||
func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*LogEntry, error) {
|
func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*LogEntry, error) {
|
||||||
h.mutex.Lock()
|
h.mutex.Lock()
|
||||||
defer h.mutex.Unlock()
|
defer h.mutex.Unlock()
|
||||||
|
|
||||||
index := uint64(len(h.entries))
|
index := uint64(len(h.entries))
|
||||||
|
|
||||||
|
sanitized := h.redactData(logType, data)
|
||||||
|
|
||||||
entry := LogEntry{
|
entry := LogEntry{
|
||||||
Index: index,
|
Index: index,
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Author: h.peerID.String(),
|
Author: h.peerID.String(),
|
||||||
Type: logType,
|
Type: logType,
|
||||||
Data: data,
|
Data: sanitized,
|
||||||
PrevHash: h.headHash,
|
PrevHash: h.headHash,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate hash
|
// Calculate hash
|
||||||
entryHash, err := h.calculateEntryHash(entry)
|
entryHash, err := h.calculateEntryHash(entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to calculate entry hash: %w", err)
|
return nil, fmt.Errorf("failed to calculate entry hash: %w", err)
|
||||||
}
|
}
|
||||||
entry.Hash = entryHash
|
entry.Hash = entryHash
|
||||||
|
|
||||||
// Add simple signature (in production, use proper cryptographic signatures)
|
// Add simple signature (in production, use proper cryptographic signatures)
|
||||||
entry.Signature = h.createSignature(entry)
|
entry.Signature = h.createSignature(entry)
|
||||||
|
|
||||||
// Append to log
|
// Append to log
|
||||||
h.entries = append(h.entries, entry)
|
h.entries = append(h.entries, entry)
|
||||||
h.headHash = entryHash
|
h.headHash = entryHash
|
||||||
|
|
||||||
fmt.Printf("📝 Log entry appended: %s [%d] by %s\n",
|
fmt.Printf("📝 Log entry appended: %s [%d] by %s\n",
|
||||||
logType, index, h.peerID.ShortString())
|
logType, index, h.peerID.ShortString())
|
||||||
|
|
||||||
// Trigger replication to connected peers
|
// Trigger replication to connected peers
|
||||||
go h.replicateEntry(entry)
|
go h.replicateEntry(entry)
|
||||||
|
|
||||||
return &entry, nil
|
return &entry, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,11 +150,11 @@ func (h *HypercoreLog) Append(logType LogType, data map[string]interface{}) (*Lo
|
|||||||
func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) {
|
func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
if index >= uint64(len(h.entries)) {
|
if index >= uint64(len(h.entries)) {
|
||||||
return nil, fmt.Errorf("entry %d not found", index)
|
return nil, fmt.Errorf("entry %d not found", index)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &h.entries[index], nil
|
return &h.entries[index], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,7 +162,7 @@ func (h *HypercoreLog) Get(index uint64) (*LogEntry, error) {
|
|||||||
func (h *HypercoreLog) Length() uint64 {
|
func (h *HypercoreLog) Length() uint64 {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
return uint64(len(h.entries))
|
return uint64(len(h.entries))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,22 +170,22 @@ func (h *HypercoreLog) Length() uint64 {
|
|||||||
func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) {
|
func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
if start >= uint64(len(h.entries)) {
|
if start >= uint64(len(h.entries)) {
|
||||||
return nil, fmt.Errorf("start index %d out of range", start)
|
return nil, fmt.Errorf("start index %d out of range", start)
|
||||||
}
|
}
|
||||||
|
|
||||||
if end > uint64(len(h.entries)) {
|
if end > uint64(len(h.entries)) {
|
||||||
end = uint64(len(h.entries))
|
end = uint64(len(h.entries))
|
||||||
}
|
}
|
||||||
|
|
||||||
if start > end {
|
if start > end {
|
||||||
return nil, fmt.Errorf("invalid range: start %d > end %d", start, end)
|
return nil, fmt.Errorf("invalid range: start %d > end %d", start, end)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]LogEntry, end-start)
|
result := make([]LogEntry, end-start)
|
||||||
copy(result, h.entries[start:end])
|
copy(result, h.entries[start:end])
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,14 +193,14 @@ func (h *HypercoreLog) GetRange(start, end uint64) ([]LogEntry, error) {
|
|||||||
func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) {
|
func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
var result []LogEntry
|
var result []LogEntry
|
||||||
for _, entry := range h.entries {
|
for _, entry := range h.entries {
|
||||||
if entry.Type == logType {
|
if entry.Type == logType {
|
||||||
result = append(result, entry)
|
result = append(result, entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -195,14 +208,14 @@ func (h *HypercoreLog) GetEntriesByType(logType LogType) ([]LogEntry, error) {
|
|||||||
func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) {
|
func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
var result []LogEntry
|
var result []LogEntry
|
||||||
for _, entry := range h.entries {
|
for _, entry := range h.entries {
|
||||||
if entry.Author == author {
|
if entry.Author == author {
|
||||||
result = append(result, entry)
|
result = append(result, entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -210,20 +223,20 @@ func (h *HypercoreLog) GetEntriesByAuthor(author string) ([]LogEntry, error) {
|
|||||||
func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) {
|
func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
totalEntries := len(h.entries)
|
totalEntries := len(h.entries)
|
||||||
if count <= 0 || totalEntries == 0 {
|
if count <= 0 || totalEntries == 0 {
|
||||||
return []LogEntry{}, nil
|
return []LogEntry{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
start := 0
|
start := 0
|
||||||
if totalEntries > count {
|
if totalEntries > count {
|
||||||
start = totalEntries - count
|
start = totalEntries - count
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]LogEntry, totalEntries-start)
|
result := make([]LogEntry, totalEntries-start)
|
||||||
copy(result, h.entries[start:])
|
copy(result, h.entries[start:])
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,14 +244,14 @@ func (h *HypercoreLog) GetRecentEntries(count int) ([]LogEntry, error) {
|
|||||||
func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) {
|
func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
if sinceIndex >= uint64(len(h.entries)) {
|
if sinceIndex >= uint64(len(h.entries)) {
|
||||||
return []LogEntry{}, nil
|
return []LogEntry{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
result := make([]LogEntry, len(h.entries)-int(sinceIndex))
|
result := make([]LogEntry, len(h.entries)-int(sinceIndex))
|
||||||
copy(result, h.entries[sinceIndex:])
|
copy(result, h.entries[sinceIndex:])
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -246,27 +259,27 @@ func (h *HypercoreLog) GetEntriesSince(sinceIndex uint64) ([]LogEntry, error) {
|
|||||||
func (h *HypercoreLog) VerifyIntegrity() error {
|
func (h *HypercoreLog) VerifyIntegrity() error {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
var prevHash string
|
var prevHash string
|
||||||
for i, entry := range h.entries {
|
for i, entry := range h.entries {
|
||||||
// Verify previous hash link
|
// Verify previous hash link
|
||||||
if entry.PrevHash != prevHash {
|
if entry.PrevHash != prevHash {
|
||||||
return fmt.Errorf("integrity error at entry %d: prev_hash mismatch", i)
|
return fmt.Errorf("integrity error at entry %d: prev_hash mismatch", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify entry hash
|
// Verify entry hash
|
||||||
calculatedHash, err := h.calculateEntryHash(entry)
|
calculatedHash, err := h.calculateEntryHash(entry)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to calculate hash for entry %d: %w", i, err)
|
return fmt.Errorf("failed to calculate hash for entry %d: %w", i, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if entry.Hash != calculatedHash {
|
if entry.Hash != calculatedHash {
|
||||||
return fmt.Errorf("integrity error at entry %d: hash mismatch", i)
|
return fmt.Errorf("integrity error at entry %d: hash mismatch", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
prevHash = entry.Hash
|
prevHash = entry.Hash
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -274,13 +287,13 @@ func (h *HypercoreLog) VerifyIntegrity() error {
|
|||||||
func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
|
func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
|
||||||
h.mutex.Lock()
|
h.mutex.Lock()
|
||||||
defer h.mutex.Unlock()
|
defer h.mutex.Unlock()
|
||||||
|
|
||||||
h.replicators[peerID] = &Replicator{
|
h.replicators[peerID] = &Replicator{
|
||||||
peerID: peerID,
|
peerID: peerID,
|
||||||
lastSyncIndex: 0,
|
lastSyncIndex: 0,
|
||||||
connected: true,
|
connected: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("🔄 Added replicator: %s\n", peerID.ShortString())
|
fmt.Printf("🔄 Added replicator: %s\n", peerID.ShortString())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -288,7 +301,7 @@ func (h *HypercoreLog) AddReplicator(peerID peer.ID) {
|
|||||||
func (h *HypercoreLog) RemoveReplicator(peerID peer.ID) {
|
func (h *HypercoreLog) RemoveReplicator(peerID peer.ID) {
|
||||||
h.mutex.Lock()
|
h.mutex.Lock()
|
||||||
defer h.mutex.Unlock()
|
defer h.mutex.Unlock()
|
||||||
|
|
||||||
delete(h.replicators, peerID)
|
delete(h.replicators, peerID)
|
||||||
fmt.Printf("🔄 Removed replicator: %s\n", peerID.ShortString())
|
fmt.Printf("🔄 Removed replicator: %s\n", peerID.ShortString())
|
||||||
}
|
}
|
||||||
@@ -303,10 +316,10 @@ func (h *HypercoreLog) replicateEntry(entry LogEntry) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
h.mutex.RUnlock()
|
h.mutex.RUnlock()
|
||||||
|
|
||||||
for _, replicator := range replicators {
|
for _, replicator := range replicators {
|
||||||
// In a real implementation, this would send the entry over the network
|
// In a real implementation, this would send the entry over the network
|
||||||
fmt.Printf("🔄 Replicating entry %d to %s\n",
|
fmt.Printf("🔄 Replicating entry %d to %s\n",
|
||||||
entry.Index, replicator.peerID.ShortString())
|
entry.Index, replicator.peerID.ShortString())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -322,16 +335,75 @@ func (h *HypercoreLog) calculateEntryHash(entry LogEntry) (string, error) {
|
|||||||
Data: entry.Data,
|
Data: entry.Data,
|
||||||
PrevHash: entry.PrevHash,
|
PrevHash: entry.PrevHash,
|
||||||
}
|
}
|
||||||
|
|
||||||
entryBytes, err := json.Marshal(entryForHash)
|
entryBytes, err := json.Marshal(entryForHash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
hash := sha256.Sum256(entryBytes)
|
hash := sha256.Sum256(entryBytes)
|
||||||
return hex.EncodeToString(hash[:]), nil
|
return hex.EncodeToString(hash[:]), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *HypercoreLog) redactData(logType LogType, data map[string]interface{}) map[string]interface{} {
|
||||||
|
cloned := cloneLogMap(data)
|
||||||
|
if cloned == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if h.redactor != nil {
|
||||||
|
labels := map[string]string{
|
||||||
|
"source": "hypercore",
|
||||||
|
"log_type": string(logType),
|
||||||
|
}
|
||||||
|
h.redactor.RedactMapWithLabels(context.Background(), cloned, labels)
|
||||||
|
}
|
||||||
|
return cloned
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneLogMap(in map[string]interface{}) map[string]interface{} {
|
||||||
|
if in == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make(map[string]interface{}, len(in))
|
||||||
|
for k, v := range in {
|
||||||
|
out[k] = cloneLogValue(v)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneLogValue(v interface{}) interface{} {
|
||||||
|
switch tv := v.(type) {
|
||||||
|
case map[string]interface{}:
|
||||||
|
return cloneLogMap(tv)
|
||||||
|
case map[string]any:
|
||||||
|
converted := make(map[string]interface{}, len(tv))
|
||||||
|
for k, val := range tv {
|
||||||
|
converted[k] = cloneLogValue(val)
|
||||||
|
}
|
||||||
|
return converted
|
||||||
|
case []interface{}:
|
||||||
|
return cloneLogSlice(tv)
|
||||||
|
case []any:
|
||||||
|
converted := make([]interface{}, len(tv))
|
||||||
|
for i, val := range tv {
|
||||||
|
converted[i] = cloneLogValue(val)
|
||||||
|
}
|
||||||
|
return converted
|
||||||
|
case []string:
|
||||||
|
return append([]string(nil), tv...)
|
||||||
|
default:
|
||||||
|
return tv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneLogSlice(in []interface{}) []interface{} {
|
||||||
|
out := make([]interface{}, len(in))
|
||||||
|
for i, val := range in {
|
||||||
|
out[i] = cloneLogValue(val)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// createSignature creates a simplified signature for the entry
|
// createSignature creates a simplified signature for the entry
|
||||||
func (h *HypercoreLog) createSignature(entry LogEntry) string {
|
func (h *HypercoreLog) createSignature(entry LogEntry) string {
|
||||||
// In production, this would use proper cryptographic signatures
|
// In production, this would use proper cryptographic signatures
|
||||||
@@ -345,21 +417,21 @@ func (h *HypercoreLog) createSignature(entry LogEntry) string {
|
|||||||
func (h *HypercoreLog) GetStats() map[string]interface{} {
|
func (h *HypercoreLog) GetStats() map[string]interface{} {
|
||||||
h.mutex.RLock()
|
h.mutex.RLock()
|
||||||
defer h.mutex.RUnlock()
|
defer h.mutex.RUnlock()
|
||||||
|
|
||||||
typeCount := make(map[LogType]int)
|
typeCount := make(map[LogType]int)
|
||||||
authorCount := make(map[string]int)
|
authorCount := make(map[string]int)
|
||||||
|
|
||||||
for _, entry := range h.entries {
|
for _, entry := range h.entries {
|
||||||
typeCount[entry.Type]++
|
typeCount[entry.Type]++
|
||||||
authorCount[entry.Author]++
|
authorCount[entry.Author]++
|
||||||
}
|
}
|
||||||
|
|
||||||
return map[string]interface{}{
|
return map[string]interface{}{
|
||||||
"total_entries": len(h.entries),
|
"total_entries": len(h.entries),
|
||||||
"head_hash": h.headHash,
|
"head_hash": h.headHash,
|
||||||
"replicators": len(h.replicators),
|
"replicators": len(h.replicators),
|
||||||
"entries_by_type": typeCount,
|
"entries_by_type": typeCount,
|
||||||
"entries_by_author": authorCount,
|
"entries_by_author": authorCount,
|
||||||
"peer_id": h.peerID.String(),
|
"peer_id": h.peerID.String(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,9 +2,11 @@ package runtime
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"chorus/internal/logging"
|
"chorus/internal/logging"
|
||||||
|
"chorus/pkg/dht"
|
||||||
"chorus/pkg/health"
|
"chorus/pkg/health"
|
||||||
"chorus/pkg/shutdown"
|
"chorus/pkg/shutdown"
|
||||||
"chorus/pubsub"
|
"chorus/pubsub"
|
||||||
@@ -43,37 +45,37 @@ func (r *SharedRuntime) StartAgentMode() error {
|
|||||||
|
|
||||||
// === Comprehensive Health Monitoring & Graceful Shutdown ===
|
// === Comprehensive Health Monitoring & Graceful Shutdown ===
|
||||||
shutdownManager := shutdown.NewManager(30*time.Second, &simpleLogger{logger: r.Logger})
|
shutdownManager := shutdown.NewManager(30*time.Second, &simpleLogger{logger: r.Logger})
|
||||||
|
|
||||||
healthManager := health.NewManager(r.Node.ID().ShortString(), AppVersion, &simpleLogger{logger: r.Logger})
|
healthManager := health.NewManager(r.Node.ID().ShortString(), AppVersion, &simpleLogger{logger: r.Logger})
|
||||||
healthManager.SetShutdownManager(shutdownManager)
|
healthManager.SetShutdownManager(shutdownManager)
|
||||||
|
|
||||||
// Register health checks
|
// Register health checks
|
||||||
r.setupHealthChecks(healthManager)
|
r.setupHealthChecks(healthManager)
|
||||||
|
|
||||||
// Register components for graceful shutdown
|
// Register components for graceful shutdown
|
||||||
r.setupGracefulShutdown(shutdownManager, healthManager)
|
r.setupGracefulShutdown(shutdownManager, healthManager)
|
||||||
|
|
||||||
// Start health monitoring
|
// Start health monitoring
|
||||||
if err := healthManager.Start(); err != nil {
|
if err := healthManager.Start(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
r.HealthManager = healthManager
|
r.HealthManager = healthManager
|
||||||
r.Logger.Info("❤️ Health monitoring started")
|
r.Logger.Info("❤️ Health monitoring started")
|
||||||
|
|
||||||
// Start health HTTP server
|
// Start health HTTP server
|
||||||
if err := healthManager.StartHTTPServer(r.Config.Network.HealthPort); err != nil {
|
if err := healthManager.StartHTTPServer(r.Config.Network.HealthPort); err != nil {
|
||||||
r.Logger.Error("❌ Failed to start health HTTP server: %v", err)
|
r.Logger.Error("❌ Failed to start health HTTP server: %v", err)
|
||||||
} else {
|
} else {
|
||||||
r.Logger.Info("🏥 Health endpoints available at http://localhost:%d/health", r.Config.Network.HealthPort)
|
r.Logger.Info("🏥 Health endpoints available at http://localhost:%d/health", r.Config.Network.HealthPort)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start shutdown manager
|
// Start shutdown manager
|
||||||
shutdownManager.Start()
|
shutdownManager.Start()
|
||||||
r.ShutdownManager = shutdownManager
|
r.ShutdownManager = shutdownManager
|
||||||
r.Logger.Info("🛡️ Graceful shutdown manager started")
|
r.Logger.Info("🛡️ Graceful shutdown manager started")
|
||||||
|
|
||||||
r.Logger.Info("✅ CHORUS agent system fully operational with health monitoring")
|
r.Logger.Info("✅ CHORUS agent system fully operational with health monitoring")
|
||||||
|
|
||||||
// Wait for graceful shutdown
|
// Wait for graceful shutdown
|
||||||
shutdownManager.Wait()
|
shutdownManager.Wait()
|
||||||
r.Logger.Info("✅ CHORUS agent system shutdown completed")
|
r.Logger.Info("✅ CHORUS agent system shutdown completed")
|
||||||
@@ -90,7 +92,7 @@ func (r *SharedRuntime) announceAvailability() {
|
|||||||
currentTasks := r.TaskTracker.GetActiveTasks()
|
currentTasks := r.TaskTracker.GetActiveTasks()
|
||||||
maxTasks := r.TaskTracker.GetMaxTasks()
|
maxTasks := r.TaskTracker.GetMaxTasks()
|
||||||
isAvailable := len(currentTasks) < maxTasks
|
isAvailable := len(currentTasks) < maxTasks
|
||||||
|
|
||||||
status := "ready"
|
status := "ready"
|
||||||
if len(currentTasks) >= maxTasks {
|
if len(currentTasks) >= maxTasks {
|
||||||
status = "busy"
|
status = "busy"
|
||||||
@@ -99,13 +101,13 @@ func (r *SharedRuntime) announceAvailability() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
availability := map[string]interface{}{
|
availability := map[string]interface{}{
|
||||||
"node_id": r.Node.ID().ShortString(),
|
"node_id": r.Node.ID().ShortString(),
|
||||||
"available_for_work": isAvailable,
|
"available_for_work": isAvailable,
|
||||||
"current_tasks": len(currentTasks),
|
"current_tasks": len(currentTasks),
|
||||||
"max_tasks": maxTasks,
|
"max_tasks": maxTasks,
|
||||||
"last_activity": time.Now().Unix(),
|
"last_activity": time.Now().Unix(),
|
||||||
"status": status,
|
"status": status,
|
||||||
"timestamp": time.Now().Unix(),
|
"timestamp": time.Now().Unix(),
|
||||||
}
|
}
|
||||||
if err := r.PubSub.PublishBzzzMessage(pubsub.AvailabilityBcast, availability); err != nil {
|
if err := r.PubSub.PublishBzzzMessage(pubsub.AvailabilityBcast, availability); err != nil {
|
||||||
r.Logger.Error("❌ Failed to announce availability: %v", err)
|
r.Logger.Error("❌ Failed to announce availability: %v", err)
|
||||||
@@ -126,16 +128,79 @@ func (r *SharedRuntime) statusReporter() {
|
|||||||
|
|
||||||
// announceCapabilitiesOnChange announces capabilities when they change
|
// announceCapabilitiesOnChange announces capabilities when they change
|
||||||
func (r *SharedRuntime) announceCapabilitiesOnChange() {
|
func (r *SharedRuntime) announceCapabilitiesOnChange() {
|
||||||
// Implementation from CHORUS would go here
|
if r.PubSub == nil {
|
||||||
// For now, just log that capabilities would be announced
|
r.Logger.Warn("⚠️ Capability broadcast skipped: PubSub not initialized")
|
||||||
r.Logger.Info("📢 Agent capabilities announcement enabled")
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("📢 Broadcasting agent capabilities to network")
|
||||||
|
|
||||||
|
activeTaskCount := 0
|
||||||
|
if r.TaskTracker != nil {
|
||||||
|
activeTaskCount = len(r.TaskTracker.GetActiveTasks())
|
||||||
|
}
|
||||||
|
|
||||||
|
announcement := map[string]interface{}{
|
||||||
|
"agent_id": r.Config.Agent.ID,
|
||||||
|
"node_id": r.Node.ID().ShortString(),
|
||||||
|
"version": AppVersion,
|
||||||
|
"capabilities": r.Config.Agent.Capabilities,
|
||||||
|
"expertise": r.Config.Agent.Expertise,
|
||||||
|
"models": r.Config.Agent.Models,
|
||||||
|
"specialization": r.Config.Agent.Specialization,
|
||||||
|
"max_tasks": r.Config.Agent.MaxTasks,
|
||||||
|
"current_tasks": activeTaskCount,
|
||||||
|
"timestamp": time.Now().Unix(),
|
||||||
|
"availability": "ready",
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := r.PubSub.PublishBzzzMessage(pubsub.CapabilityBcast, announcement); err != nil {
|
||||||
|
r.Logger.Error("❌ Failed to broadcast capabilities: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("✅ Capabilities broadcast published")
|
||||||
|
|
||||||
|
// TODO: Watch for live capability changes (role updates, model changes) and re-broadcast
|
||||||
}
|
}
|
||||||
|
|
||||||
// announceRoleOnStartup announces role when the agent starts
|
// announceRoleOnStartup announces role when the agent starts
|
||||||
func (r *SharedRuntime) announceRoleOnStartup() {
|
func (r *SharedRuntime) announceRoleOnStartup() {
|
||||||
// Implementation from CHORUS would go here
|
role := r.Config.Agent.Role
|
||||||
// For now, just log that role would be announced
|
if role == "" {
|
||||||
r.Logger.Info("🎭 Agent role announcement enabled")
|
r.Logger.Info("🎭 No agent role configured; skipping role announcement")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.PubSub == nil {
|
||||||
|
r.Logger.Warn("⚠️ Role announcement skipped: PubSub not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("🎭 Announcing agent role to collaboration mesh")
|
||||||
|
|
||||||
|
announcement := map[string]interface{}{
|
||||||
|
"agent_id": r.Config.Agent.ID,
|
||||||
|
"node_id": r.Node.ID().ShortString(),
|
||||||
|
"role": role,
|
||||||
|
"expertise": r.Config.Agent.Expertise,
|
||||||
|
"capabilities": r.Config.Agent.Capabilities,
|
||||||
|
"reports_to": r.Config.Agent.ReportsTo,
|
||||||
|
"specialization": r.Config.Agent.Specialization,
|
||||||
|
"timestamp": time.Now().Unix(),
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := pubsub.MessageOptions{
|
||||||
|
FromRole: role,
|
||||||
|
Priority: "medium",
|
||||||
|
ThreadID: fmt.Sprintf("role:%s", role),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := r.PubSub.PublishRoleBasedMessage(pubsub.RoleAnnouncement, announcement, opts); err != nil {
|
||||||
|
r.Logger.Error("❌ Failed to announce role: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
r.Logger.Info("✅ Role announcement published")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
|
func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
|
||||||
@@ -151,31 +216,108 @@ func (r *SharedRuntime) setupHealthChecks(healthManager *health.Manager) {
|
|||||||
Checker: func(ctx context.Context) health.CheckResult {
|
Checker: func(ctx context.Context) health.CheckResult {
|
||||||
healthInfo := r.BackbeatIntegration.GetHealth()
|
healthInfo := r.BackbeatIntegration.GetHealth()
|
||||||
connected, _ := healthInfo["connected"].(bool)
|
connected, _ := healthInfo["connected"].(bool)
|
||||||
|
|
||||||
result := health.CheckResult{
|
result := health.CheckResult{
|
||||||
Healthy: connected,
|
Healthy: connected,
|
||||||
Details: healthInfo,
|
Details: healthInfo,
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
if connected {
|
if connected {
|
||||||
result.Message = "BACKBEAT integration healthy and connected"
|
result.Message = "BACKBEAT integration healthy and connected"
|
||||||
} else {
|
} else {
|
||||||
result.Message = "BACKBEAT integration not connected"
|
result.Message = "BACKBEAT integration not connected"
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
healthManager.RegisterCheck(backbeatCheck)
|
healthManager.RegisterCheck(backbeatCheck)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add other health checks (P2P, DHT, etc.)
|
// Register enhanced health instrumentation when core subsystems are available
|
||||||
// Implementation from CHORUS would go here
|
if r.PubSub == nil {
|
||||||
|
r.Logger.Warn("⚠️ Skipping enhanced health checks: PubSub not initialized")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if r.ElectionManager == nil {
|
||||||
|
r.Logger.Warn("⚠️ Skipping enhanced health checks: election manager not ready")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var replication *dht.ReplicationManager
|
||||||
|
if r.DHTNode != nil {
|
||||||
|
replication = r.DHTNode.ReplicationManager()
|
||||||
|
}
|
||||||
|
|
||||||
|
enhanced := health.NewEnhancedHealthChecks(
|
||||||
|
healthManager,
|
||||||
|
r.ElectionManager,
|
||||||
|
r.DHTNode,
|
||||||
|
r.PubSub,
|
||||||
|
replication,
|
||||||
|
&simpleLogger{logger: r.Logger},
|
||||||
|
)
|
||||||
|
|
||||||
|
r.EnhancedHealth = enhanced
|
||||||
|
r.Logger.Info("🩺 Enhanced health checks registered")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) {
|
func (r *SharedRuntime) setupGracefulShutdown(shutdownManager *shutdown.Manager, healthManager *health.Manager) {
|
||||||
// Register components for graceful shutdown
|
if shutdownManager == nil {
|
||||||
// Implementation would register all components that need graceful shutdown
|
r.Logger.Warn("⚠️ Shutdown manager not initialized; graceful teardown skipped")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.HTTPServer != nil {
|
||||||
|
httpComponent := shutdown.NewGenericComponent("http-api-server", 10, true).
|
||||||
|
SetShutdownFunc(func(ctx context.Context) error {
|
||||||
|
return r.HTTPServer.Stop()
|
||||||
|
})
|
||||||
|
shutdownManager.Register(httpComponent)
|
||||||
|
}
|
||||||
|
|
||||||
|
if healthManager != nil {
|
||||||
|
healthComponent := shutdown.NewGenericComponent("health-manager", 15, true).
|
||||||
|
SetShutdownFunc(func(ctx context.Context) error {
|
||||||
|
return healthManager.Stop()
|
||||||
|
})
|
||||||
|
shutdownManager.Register(healthComponent)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.UCXIServer != nil {
|
||||||
|
ucxiComponent := shutdown.NewGenericComponent("ucxi-server", 20, true).
|
||||||
|
SetShutdownFunc(func(ctx context.Context) error {
|
||||||
|
return r.UCXIServer.Stop()
|
||||||
|
})
|
||||||
|
shutdownManager.Register(ucxiComponent)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.PubSub != nil {
|
||||||
|
shutdownManager.Register(shutdown.NewPubSubComponent("pubsub", r.PubSub.Close, 30))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.DHTNode != nil {
|
||||||
|
dhtComponent := shutdown.NewGenericComponent("dht-node", 35, true).
|
||||||
|
SetCloser(r.DHTNode.Close)
|
||||||
|
shutdownManager.Register(dhtComponent)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Node != nil {
|
||||||
|
shutdownManager.Register(shutdown.NewP2PNodeComponent("p2p-node", r.Node.Close, 40))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.ElectionManager != nil {
|
||||||
|
shutdownManager.Register(shutdown.NewElectionManagerComponent("election-manager", r.ElectionManager.Stop, 45))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.BackbeatIntegration != nil {
|
||||||
|
backbeatComponent := shutdown.NewGenericComponent("backbeat-integration", 50, true).
|
||||||
|
SetShutdownFunc(func(ctx context.Context) error {
|
||||||
|
return r.BackbeatIntegration.Stop()
|
||||||
|
})
|
||||||
|
shutdownManager.Register(backbeatComponent)
|
||||||
|
}
|
||||||
|
|
||||||
r.Logger.Info("🛡️ Graceful shutdown components registered")
|
r.Logger.Info("🛡️ Graceful shutdown components registered")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,8 +21,10 @@ import (
|
|||||||
"chorus/pkg/dht"
|
"chorus/pkg/dht"
|
||||||
"chorus/pkg/election"
|
"chorus/pkg/election"
|
||||||
"chorus/pkg/health"
|
"chorus/pkg/health"
|
||||||
"chorus/pkg/shutdown"
|
"chorus/pkg/metrics"
|
||||||
"chorus/pkg/prompt"
|
"chorus/pkg/prompt"
|
||||||
|
"chorus/pkg/shhh"
|
||||||
|
"chorus/pkg/shutdown"
|
||||||
"chorus/pkg/ucxi"
|
"chorus/pkg/ucxi"
|
||||||
"chorus/pkg/ucxl"
|
"chorus/pkg/ucxl"
|
||||||
"chorus/pubsub"
|
"chorus/pubsub"
|
||||||
@@ -53,8 +55,8 @@ func (l *SimpleLogger) Error(msg string, args ...interface{}) {
|
|||||||
|
|
||||||
// SimpleTaskTracker tracks active tasks for availability reporting
|
// SimpleTaskTracker tracks active tasks for availability reporting
|
||||||
type SimpleTaskTracker struct {
|
type SimpleTaskTracker struct {
|
||||||
maxTasks int
|
maxTasks int
|
||||||
activeTasks map[string]bool
|
activeTasks map[string]bool
|
||||||
decisionPublisher *ucxl.DecisionPublisher
|
decisionPublisher *ucxl.DecisionPublisher
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,7 +82,7 @@ func (t *SimpleTaskTracker) AddTask(taskID string) {
|
|||||||
// RemoveTask marks a task as completed and publishes decision if publisher available
|
// RemoveTask marks a task as completed and publishes decision if publisher available
|
||||||
func (t *SimpleTaskTracker) RemoveTask(taskID string) {
|
func (t *SimpleTaskTracker) RemoveTask(taskID string) {
|
||||||
delete(t.activeTasks, taskID)
|
delete(t.activeTasks, taskID)
|
||||||
|
|
||||||
// Publish task completion decision if publisher is available
|
// Publish task completion decision if publisher is available
|
||||||
if t.decisionPublisher != nil {
|
if t.decisionPublisher != nil {
|
||||||
t.publishTaskCompletion(taskID, true, "Task completed successfully", nil)
|
t.publishTaskCompletion(taskID, true, "Task completed successfully", nil)
|
||||||
@@ -92,7 +94,7 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s
|
|||||||
if t.decisionPublisher == nil {
|
if t.decisionPublisher == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := t.decisionPublisher.PublishTaskCompletion(taskID, success, summary, filesModified); err != nil {
|
if err := t.decisionPublisher.PublishTaskCompletion(taskID, success, summary, filesModified); err != nil {
|
||||||
fmt.Printf("⚠️ Failed to publish task completion for %s: %v\n", taskID, err)
|
fmt.Printf("⚠️ Failed to publish task completion for %s: %v\n", taskID, err)
|
||||||
} else {
|
} else {
|
||||||
@@ -102,32 +104,35 @@ func (t *SimpleTaskTracker) publishTaskCompletion(taskID string, success bool, s
|
|||||||
|
|
||||||
// SharedRuntime contains all the shared P2P infrastructure components
|
// SharedRuntime contains all the shared P2P infrastructure components
|
||||||
type SharedRuntime struct {
|
type SharedRuntime struct {
|
||||||
Config *config.Config
|
Config *config.Config
|
||||||
Logger *SimpleLogger
|
Logger *SimpleLogger
|
||||||
Context context.Context
|
Context context.Context
|
||||||
Cancel context.CancelFunc
|
Cancel context.CancelFunc
|
||||||
Node *p2p.Node
|
Node *p2p.Node
|
||||||
PubSub *pubsub.PubSub
|
PubSub *pubsub.PubSub
|
||||||
HypercoreLog *logging.HypercoreLog
|
HypercoreLog *logging.HypercoreLog
|
||||||
MDNSDiscovery *discovery.MDNSDiscovery
|
MDNSDiscovery *discovery.MDNSDiscovery
|
||||||
BackbeatIntegration *backbeat.Integration
|
BackbeatIntegration *backbeat.Integration
|
||||||
DHTNode *dht.LibP2PDHT
|
DHTNode *dht.LibP2PDHT
|
||||||
EncryptedStorage *dht.EncryptedDHTStorage
|
EncryptedStorage *dht.EncryptedDHTStorage
|
||||||
DecisionPublisher *ucxl.DecisionPublisher
|
DecisionPublisher *ucxl.DecisionPublisher
|
||||||
ElectionManager *election.ElectionManager
|
ElectionManager *election.ElectionManager
|
||||||
TaskCoordinator *coordinator.TaskCoordinator
|
TaskCoordinator *coordinator.TaskCoordinator
|
||||||
HTTPServer *api.HTTPServer
|
HTTPServer *api.HTTPServer
|
||||||
UCXIServer *ucxi.Server
|
UCXIServer *ucxi.Server
|
||||||
HealthManager *health.Manager
|
HealthManager *health.Manager
|
||||||
ShutdownManager *shutdown.Manager
|
EnhancedHealth *health.EnhancedHealthChecks
|
||||||
TaskTracker *SimpleTaskTracker
|
ShutdownManager *shutdown.Manager
|
||||||
|
TaskTracker *SimpleTaskTracker
|
||||||
|
Metrics *metrics.CHORUSMetrics
|
||||||
|
Shhh *shhh.Sentinel
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize sets up all shared P2P infrastructure components
|
// Initialize sets up all shared P2P infrastructure components
|
||||||
func Initialize(appMode string) (*SharedRuntime, error) {
|
func Initialize(appMode string) (*SharedRuntime, error) {
|
||||||
runtime := &SharedRuntime{}
|
runtime := &SharedRuntime{}
|
||||||
runtime.Logger = &SimpleLogger{}
|
runtime.Logger = &SimpleLogger{}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
runtime.Context = ctx
|
runtime.Context = ctx
|
||||||
runtime.Cancel = cancel
|
runtime.Cancel = cancel
|
||||||
@@ -142,7 +147,7 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
|||||||
return nil, fmt.Errorf("configuration error: %v", err)
|
return nil, fmt.Errorf("configuration error: %v", err)
|
||||||
}
|
}
|
||||||
runtime.Config = cfg
|
runtime.Config = cfg
|
||||||
|
|
||||||
runtime.Logger.Info("✅ Configuration loaded successfully")
|
runtime.Logger.Info("✅ Configuration loaded successfully")
|
||||||
runtime.Logger.Info("🤖 Agent ID: %s", cfg.Agent.ID)
|
runtime.Logger.Info("🤖 Agent ID: %s", cfg.Agent.ID)
|
||||||
runtime.Logger.Info("🎯 Specialization: %s", cfg.Agent.Specialization)
|
runtime.Logger.Info("🎯 Specialization: %s", cfg.Agent.Specialization)
|
||||||
@@ -166,6 +171,21 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
|||||||
}
|
}
|
||||||
runtime.Logger.Info("✅ AI provider configured successfully")
|
runtime.Logger.Info("✅ AI provider configured successfully")
|
||||||
|
|
||||||
|
// Initialize metrics collector
|
||||||
|
runtime.Metrics = metrics.NewCHORUSMetrics(nil)
|
||||||
|
|
||||||
|
// Initialize SHHH sentinel
|
||||||
|
sentinel, err := shhh.NewSentinel(
|
||||||
|
shhh.Config{},
|
||||||
|
shhh.WithFindingObserver(runtime.handleShhhFindings),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to initialize SHHH sentinel: %v", err)
|
||||||
|
}
|
||||||
|
sentinel.SetAuditSink(&shhhAuditSink{logger: runtime.Logger})
|
||||||
|
runtime.Shhh = sentinel
|
||||||
|
runtime.Logger.Info("🛡️ SHHH sentinel initialized")
|
||||||
|
|
||||||
// Initialize BACKBEAT integration
|
// Initialize BACKBEAT integration
|
||||||
var backbeatIntegration *backbeat.Integration
|
var backbeatIntegration *backbeat.Integration
|
||||||
backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger)
|
backbeatIntegration, err = backbeat.NewIntegration(cfg, cfg.Agent.ID, runtime.Logger)
|
||||||
@@ -198,6 +218,9 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
|||||||
|
|
||||||
// Initialize Hypercore-style logger for P2P coordination
|
// Initialize Hypercore-style logger for P2P coordination
|
||||||
hlog := logging.NewHypercoreLog(node.ID())
|
hlog := logging.NewHypercoreLog(node.ID())
|
||||||
|
if runtime.Shhh != nil {
|
||||||
|
hlog.SetRedactor(runtime.Shhh)
|
||||||
|
}
|
||||||
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
|
hlog.Append(logging.PeerJoined, map[string]interface{}{"status": "started"})
|
||||||
runtime.HypercoreLog = hlog
|
runtime.HypercoreLog = hlog
|
||||||
runtime.Logger.Info("📝 Hypercore logger initialized")
|
runtime.Logger.Info("📝 Hypercore logger initialized")
|
||||||
@@ -214,8 +237,11 @@ func Initialize(appMode string) (*SharedRuntime, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create PubSub: %v", err)
|
return nil, fmt.Errorf("failed to create PubSub: %v", err)
|
||||||
}
|
}
|
||||||
|
if runtime.Shhh != nil {
|
||||||
|
ps.SetRedactor(runtime.Shhh)
|
||||||
|
}
|
||||||
runtime.PubSub = ps
|
runtime.PubSub = ps
|
||||||
|
|
||||||
runtime.Logger.Info("📡 PubSub system initialized")
|
runtime.Logger.Info("📡 PubSub system initialized")
|
||||||
|
|
||||||
// Join role-based topics if role is configured
|
// Join role-based topics if role is configured
|
||||||
@@ -294,12 +320,12 @@ func (r *SharedRuntime) Cleanup() {
|
|||||||
func (r *SharedRuntime) initializeElectionSystem() error {
|
func (r *SharedRuntime) initializeElectionSystem() error {
|
||||||
// === Admin Election System ===
|
// === Admin Election System ===
|
||||||
electionManager := election.NewElectionManager(r.Context, r.Config, r.Node.Host(), r.PubSub, r.Node.ID().ShortString())
|
electionManager := election.NewElectionManager(r.Context, r.Config, r.Node.Host(), r.PubSub, r.Node.ID().ShortString())
|
||||||
|
|
||||||
// Set election callbacks with BACKBEAT integration
|
// Set election callbacks with BACKBEAT integration
|
||||||
electionManager.SetCallbacks(
|
electionManager.SetCallbacks(
|
||||||
func(oldAdmin, newAdmin string) {
|
func(oldAdmin, newAdmin string) {
|
||||||
r.Logger.Info("👑 Admin changed: %s -> %s", oldAdmin, newAdmin)
|
r.Logger.Info("👑 Admin changed: %s -> %s", oldAdmin, newAdmin)
|
||||||
|
|
||||||
// Track admin change with BACKBEAT if available
|
// Track admin change with BACKBEAT if available
|
||||||
if r.BackbeatIntegration != nil {
|
if r.BackbeatIntegration != nil {
|
||||||
operationID := fmt.Sprintf("admin-change-%d", time.Now().Unix())
|
operationID := fmt.Sprintf("admin-change-%d", time.Now().Unix())
|
||||||
@@ -311,7 +337,7 @@ func (r *SharedRuntime) initializeElectionSystem() error {
|
|||||||
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
|
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this node becomes admin, enable SLURP functionality
|
// If this node becomes admin, enable SLURP functionality
|
||||||
if newAdmin == r.Node.ID().ShortString() {
|
if newAdmin == r.Node.ID().ShortString() {
|
||||||
r.Logger.Info("🎯 This node is now admin - enabling SLURP functionality")
|
r.Logger.Info("🎯 This node is now admin - enabling SLURP functionality")
|
||||||
@@ -324,12 +350,12 @@ func (r *SharedRuntime) initializeElectionSystem() error {
|
|||||||
},
|
},
|
||||||
func(winner string) {
|
func(winner string) {
|
||||||
r.Logger.Info("🏆 Election completed, winner: %s", winner)
|
r.Logger.Info("🏆 Election completed, winner: %s", winner)
|
||||||
|
|
||||||
// Track election completion with BACKBEAT if available
|
// Track election completion with BACKBEAT if available
|
||||||
if r.BackbeatIntegration != nil {
|
if r.BackbeatIntegration != nil {
|
||||||
operationID := fmt.Sprintf("election-completed-%d", time.Now().Unix())
|
operationID := fmt.Sprintf("election-completed-%d", time.Now().Unix())
|
||||||
if err := r.BackbeatIntegration.StartP2POperation(operationID, "election", 1, map[string]interface{}{
|
if err := r.BackbeatIntegration.StartP2POperation(operationID, "election", 1, map[string]interface{}{
|
||||||
"winner": winner,
|
"winner": winner,
|
||||||
"node_id": r.Node.ID().ShortString(),
|
"node_id": r.Node.ID().ShortString(),
|
||||||
}); err == nil {
|
}); err == nil {
|
||||||
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
|
r.BackbeatIntegration.CompleteP2POperation(operationID, 1)
|
||||||
@@ -337,22 +363,22 @@ func (r *SharedRuntime) initializeElectionSystem() error {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if err := electionManager.Start(); err != nil {
|
if err := electionManager.Start(); err != nil {
|
||||||
return fmt.Errorf("failed to start election manager: %v", err)
|
return fmt.Errorf("failed to start election manager: %v", err)
|
||||||
}
|
}
|
||||||
r.ElectionManager = electionManager
|
r.ElectionManager = electionManager
|
||||||
r.Logger.Info("✅ Election manager started with automated heartbeat management")
|
r.Logger.Info("✅ Election manager started with automated heartbeat management")
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *SharedRuntime) initializeDHTStorage() error {
|
func (r *SharedRuntime) initializeDHTStorage() error {
|
||||||
// === DHT Storage and Decision Publishing ===
|
// === DHT Storage and Decision Publishing ===
|
||||||
var dhtNode *dht.LibP2PDHT
|
var dhtNode *dht.LibP2PDHT
|
||||||
var encryptedStorage *dht.EncryptedDHTStorage
|
var encryptedStorage *dht.EncryptedDHTStorage
|
||||||
var decisionPublisher *ucxl.DecisionPublisher
|
var decisionPublisher *ucxl.DecisionPublisher
|
||||||
|
|
||||||
if r.Config.V2.DHT.Enabled {
|
if r.Config.V2.DHT.Enabled {
|
||||||
// Create DHT
|
// Create DHT
|
||||||
var err error
|
var err error
|
||||||
@@ -361,14 +387,14 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
|||||||
r.Logger.Warn("⚠️ Failed to create DHT: %v", err)
|
r.Logger.Warn("⚠️ Failed to create DHT: %v", err)
|
||||||
} else {
|
} else {
|
||||||
r.Logger.Info("🕸️ DHT initialized")
|
r.Logger.Info("🕸️ DHT initialized")
|
||||||
|
|
||||||
// Bootstrap DHT with BACKBEAT tracking
|
// Bootstrap DHT with BACKBEAT tracking
|
||||||
if r.BackbeatIntegration != nil {
|
if r.BackbeatIntegration != nil {
|
||||||
operationID := fmt.Sprintf("dht-bootstrap-%d", time.Now().Unix())
|
operationID := fmt.Sprintf("dht-bootstrap-%d", time.Now().Unix())
|
||||||
if err := r.BackbeatIntegration.StartP2POperation(operationID, "dht_bootstrap", 4, nil); err == nil {
|
if err := r.BackbeatIntegration.StartP2POperation(operationID, "dht_bootstrap", 4, nil); err == nil {
|
||||||
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
|
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := dhtNode.Bootstrap(); err != nil {
|
if err := dhtNode.Bootstrap(); err != nil {
|
||||||
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
|
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
|
||||||
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
|
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
|
||||||
@@ -380,22 +406,22 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
|||||||
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
|
r.Logger.Warn("⚠️ DHT bootstrap failed: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect to bootstrap peers if configured
|
// Connect to bootstrap peers if configured
|
||||||
for _, addrStr := range r.Config.V2.DHT.BootstrapPeers {
|
for _, addrStr := range r.Config.V2.DHT.BootstrapPeers {
|
||||||
addr, err := multiaddr.NewMultiaddr(addrStr)
|
addr, err := multiaddr.NewMultiaddr(addrStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Logger.Warn("⚠️ Invalid bootstrap address %s: %v", addrStr, err)
|
r.Logger.Warn("⚠️ Invalid bootstrap address %s: %v", addrStr, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract peer info from multiaddr
|
// Extract peer info from multiaddr
|
||||||
info, err := peer.AddrInfoFromP2pAddr(addr)
|
info, err := peer.AddrInfoFromP2pAddr(addr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Logger.Warn("⚠️ Failed to parse peer info from %s: %v", addrStr, err)
|
r.Logger.Warn("⚠️ Failed to parse peer info from %s: %v", addrStr, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track peer discovery with BACKBEAT if available
|
// Track peer discovery with BACKBEAT if available
|
||||||
if r.BackbeatIntegration != nil {
|
if r.BackbeatIntegration != nil {
|
||||||
operationID := fmt.Sprintf("peer-discovery-%d", time.Now().Unix())
|
operationID := fmt.Sprintf("peer-discovery-%d", time.Now().Unix())
|
||||||
@@ -403,7 +429,7 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
|||||||
"peer_addr": addrStr,
|
"peer_addr": addrStr,
|
||||||
}); err == nil {
|
}); err == nil {
|
||||||
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
|
r.BackbeatIntegration.UpdateP2POperationPhase(operationID, backbeat.PhaseConnecting, 0)
|
||||||
|
|
||||||
if err := r.Node.Host().Connect(r.Context, *info); err != nil {
|
if err := r.Node.Host().Connect(r.Context, *info); err != nil {
|
||||||
r.Logger.Warn("⚠️ Failed to connect to bootstrap peer %s: %v", addrStr, err)
|
r.Logger.Warn("⚠️ Failed to connect to bootstrap peer %s: %v", addrStr, err)
|
||||||
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
|
r.BackbeatIntegration.FailP2POperation(operationID, err.Error())
|
||||||
@@ -420,20 +446,20 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize encrypted storage
|
// Initialize encrypted storage
|
||||||
encryptedStorage = dht.NewEncryptedDHTStorage(
|
encryptedStorage = dht.NewEncryptedDHTStorage(
|
||||||
r.Context,
|
r.Context,
|
||||||
r.Node.Host(),
|
r.Node.Host(),
|
||||||
dhtNode,
|
dhtNode,
|
||||||
r.Config,
|
r.Config,
|
||||||
r.Node.ID().ShortString(),
|
r.Node.ID().ShortString(),
|
||||||
)
|
)
|
||||||
|
|
||||||
// Start cache cleanup
|
// Start cache cleanup
|
||||||
encryptedStorage.StartCacheCleanup(5 * time.Minute)
|
encryptedStorage.StartCacheCleanup(5 * time.Minute)
|
||||||
r.Logger.Info("🔐 Encrypted DHT storage initialized")
|
r.Logger.Info("🔐 Encrypted DHT storage initialized")
|
||||||
|
|
||||||
// Initialize decision publisher
|
// Initialize decision publisher
|
||||||
decisionPublisher = ucxl.NewDecisionPublisher(
|
decisionPublisher = ucxl.NewDecisionPublisher(
|
||||||
r.Context,
|
r.Context,
|
||||||
@@ -451,11 +477,24 @@ func (r *SharedRuntime) initializeDHTStorage() error {
|
|||||||
r.DHTNode = dhtNode
|
r.DHTNode = dhtNode
|
||||||
r.EncryptedStorage = encryptedStorage
|
r.EncryptedStorage = encryptedStorage
|
||||||
r.DecisionPublisher = decisionPublisher
|
r.DecisionPublisher = decisionPublisher
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *SharedRuntime) initializeServices() error {
|
func (r *SharedRuntime) initializeServices() error {
|
||||||
|
// Create simple task tracker ahead of coordinator so broadcasts stay accurate
|
||||||
|
taskTracker := &SimpleTaskTracker{
|
||||||
|
maxTasks: r.Config.Agent.MaxTasks,
|
||||||
|
activeTasks: make(map[string]bool),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect decision publisher to task tracker if available
|
||||||
|
if r.DecisionPublisher != nil {
|
||||||
|
taskTracker.decisionPublisher = r.DecisionPublisher
|
||||||
|
r.Logger.Info("📤 Task completion decisions will be published to DHT")
|
||||||
|
}
|
||||||
|
r.TaskTracker = taskTracker
|
||||||
|
|
||||||
// === Task Coordination Integration ===
|
// === Task Coordination Integration ===
|
||||||
taskCoordinator := coordinator.NewTaskCoordinator(
|
taskCoordinator := coordinator.NewTaskCoordinator(
|
||||||
r.Context,
|
r.Context,
|
||||||
@@ -464,8 +503,9 @@ func (r *SharedRuntime) initializeServices() error {
|
|||||||
r.Config,
|
r.Config,
|
||||||
r.Node.ID().ShortString(),
|
r.Node.ID().ShortString(),
|
||||||
nil, // HMMM router placeholder
|
nil, // HMMM router placeholder
|
||||||
|
taskTracker,
|
||||||
)
|
)
|
||||||
|
|
||||||
taskCoordinator.Start()
|
taskCoordinator.Start()
|
||||||
r.TaskCoordinator = taskCoordinator
|
r.TaskCoordinator = taskCoordinator
|
||||||
r.Logger.Info("✅ Task coordination system active")
|
r.Logger.Info("✅ Task coordination system active")
|
||||||
@@ -487,14 +527,14 @@ func (r *SharedRuntime) initializeServices() error {
|
|||||||
if storageDir == "" {
|
if storageDir == "" {
|
||||||
storageDir = filepath.Join(os.TempDir(), "chorus-ucxi-storage")
|
storageDir = filepath.Join(os.TempDir(), "chorus-ucxi-storage")
|
||||||
}
|
}
|
||||||
|
|
||||||
storage, err := ucxi.NewBasicContentStorage(storageDir)
|
storage, err := ucxi.NewBasicContentStorage(storageDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.Logger.Warn("⚠️ Failed to create UCXI storage: %v", err)
|
r.Logger.Warn("⚠️ Failed to create UCXI storage: %v", err)
|
||||||
} else {
|
} else {
|
||||||
resolver := ucxi.NewBasicAddressResolver(r.Node.ID().ShortString())
|
resolver := ucxi.NewBasicAddressResolver(r.Node.ID().ShortString())
|
||||||
resolver.SetDefaultTTL(r.Config.UCXL.Resolution.CacheTTL)
|
resolver.SetDefaultTTL(r.Config.UCXL.Resolution.CacheTTL)
|
||||||
|
|
||||||
ucxiConfig := ucxi.ServerConfig{
|
ucxiConfig := ucxi.ServerConfig{
|
||||||
Port: r.Config.UCXL.Server.Port,
|
Port: r.Config.UCXL.Server.Port,
|
||||||
BasePath: r.Config.UCXL.Server.BasePath,
|
BasePath: r.Config.UCXL.Server.BasePath,
|
||||||
@@ -502,7 +542,7 @@ func (r *SharedRuntime) initializeServices() error {
|
|||||||
Storage: storage,
|
Storage: storage,
|
||||||
Logger: ucxi.SimpleLogger{},
|
Logger: ucxi.SimpleLogger{},
|
||||||
}
|
}
|
||||||
|
|
||||||
ucxiServer = ucxi.NewServer(ucxiConfig)
|
ucxiServer = ucxi.NewServer(ucxiConfig)
|
||||||
go func() {
|
go func() {
|
||||||
r.Logger.Info("🔗 UCXI server starting on :%d", r.Config.UCXL.Server.Port)
|
r.Logger.Info("🔗 UCXI server starting on :%d", r.Config.UCXL.Server.Port)
|
||||||
@@ -515,35 +555,41 @@ func (r *SharedRuntime) initializeServices() error {
|
|||||||
r.Logger.Info("⚪ UCXI server disabled")
|
r.Logger.Info("⚪ UCXI server disabled")
|
||||||
}
|
}
|
||||||
r.UCXIServer = ucxiServer
|
r.UCXIServer = ucxiServer
|
||||||
|
|
||||||
// Create simple task tracker
|
|
||||||
taskTracker := &SimpleTaskTracker{
|
|
||||||
maxTasks: r.Config.Agent.MaxTasks,
|
|
||||||
activeTasks: make(map[string]bool),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Connect decision publisher to task tracker if available
|
|
||||||
if r.DecisionPublisher != nil {
|
|
||||||
taskTracker.decisionPublisher = r.DecisionPublisher
|
|
||||||
r.Logger.Info("📤 Task completion decisions will be published to DHT")
|
|
||||||
}
|
|
||||||
r.TaskTracker = taskTracker
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *SharedRuntime) handleShhhFindings(ctx context.Context, findings []shhh.Finding) {
|
||||||
|
if r == nil || r.Metrics == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, finding := range findings {
|
||||||
|
r.Metrics.IncrementSHHHFindings(finding.Rule, string(finding.Severity), finding.Count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type shhhAuditSink struct {
|
||||||
|
logger *SimpleLogger
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *shhhAuditSink) RecordRedaction(_ context.Context, event shhh.AuditEvent) {
|
||||||
|
if s == nil || s.logger == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.logger.Warn("🔒 SHHH redaction applied (rule=%s severity=%s path=%s)", event.Rule, event.Severity, event.Path)
|
||||||
|
}
|
||||||
|
|
||||||
// initializeAIProvider configures the reasoning engine with the appropriate AI provider
|
// initializeAIProvider configures the reasoning engine with the appropriate AI provider
|
||||||
func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
|
func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
|
||||||
// Set the AI provider
|
// Set the AI provider
|
||||||
reasoning.SetAIProvider(cfg.AI.Provider)
|
reasoning.SetAIProvider(cfg.AI.Provider)
|
||||||
|
|
||||||
// Configure the selected provider
|
// Configure the selected provider
|
||||||
switch cfg.AI.Provider {
|
switch cfg.AI.Provider {
|
||||||
case "resetdata":
|
case "resetdata":
|
||||||
if cfg.AI.ResetData.APIKey == "" {
|
if cfg.AI.ResetData.APIKey == "" {
|
||||||
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for resetdata provider")
|
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for resetdata provider")
|
||||||
}
|
}
|
||||||
|
|
||||||
resetdataConfig := reasoning.ResetDataConfig{
|
resetdataConfig := reasoning.ResetDataConfig{
|
||||||
BaseURL: cfg.AI.ResetData.BaseURL,
|
BaseURL: cfg.AI.ResetData.BaseURL,
|
||||||
APIKey: cfg.AI.ResetData.APIKey,
|
APIKey: cfg.AI.ResetData.APIKey,
|
||||||
@@ -551,19 +597,19 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
|
|||||||
Timeout: cfg.AI.ResetData.Timeout,
|
Timeout: cfg.AI.ResetData.Timeout,
|
||||||
}
|
}
|
||||||
reasoning.SetResetDataConfig(resetdataConfig)
|
reasoning.SetResetDataConfig(resetdataConfig)
|
||||||
logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s",
|
logger.Info("🌐 ResetData AI provider configured - Endpoint: %s, Model: %s",
|
||||||
cfg.AI.ResetData.BaseURL, cfg.AI.ResetData.Model)
|
cfg.AI.ResetData.BaseURL, cfg.AI.ResetData.Model)
|
||||||
|
|
||||||
case "ollama":
|
case "ollama":
|
||||||
reasoning.SetOllamaEndpoint(cfg.AI.Ollama.Endpoint)
|
reasoning.SetOllamaEndpoint(cfg.AI.Ollama.Endpoint)
|
||||||
logger.Info("🦙 Ollama AI provider configured - Endpoint: %s", cfg.AI.Ollama.Endpoint)
|
logger.Info("🦙 Ollama AI provider configured - Endpoint: %s", cfg.AI.Ollama.Endpoint)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
logger.Warn("⚠️ Unknown AI provider '%s', defaulting to resetdata", cfg.AI.Provider)
|
logger.Warn("⚠️ Unknown AI provider '%s', defaulting to resetdata", cfg.AI.Provider)
|
||||||
if cfg.AI.ResetData.APIKey == "" {
|
if cfg.AI.ResetData.APIKey == "" {
|
||||||
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for default resetdata provider")
|
return fmt.Errorf("RESETDATA_API_KEY environment variable is required for default resetdata provider")
|
||||||
}
|
}
|
||||||
|
|
||||||
resetdataConfig := reasoning.ResetDataConfig{
|
resetdataConfig := reasoning.ResetDataConfig{
|
||||||
BaseURL: cfg.AI.ResetData.BaseURL,
|
BaseURL: cfg.AI.ResetData.BaseURL,
|
||||||
APIKey: cfg.AI.ResetData.APIKey,
|
APIKey: cfg.AI.ResetData.APIKey,
|
||||||
@@ -573,7 +619,7 @@ func initializeAIProvider(cfg *config.Config, logger *SimpleLogger) error {
|
|||||||
reasoning.SetResetDataConfig(resetdataConfig)
|
reasoning.SetResetDataConfig(resetdataConfig)
|
||||||
reasoning.SetAIProvider("resetdata")
|
reasoning.SetAIProvider("resetdata")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure model selection
|
// Configure model selection
|
||||||
reasoning.SetModelConfig(
|
reasoning.SetModelConfig(
|
||||||
cfg.Agent.Models,
|
cfg.Agent.Models,
|
||||||
|
|||||||
@@ -28,17 +28,18 @@ type Config struct {
|
|||||||
|
|
||||||
// AgentConfig defines agent-specific settings
|
// AgentConfig defines agent-specific settings
|
||||||
type AgentConfig struct {
|
type AgentConfig struct {
|
||||||
ID string `yaml:"id"`
|
ID string `yaml:"id"`
|
||||||
Specialization string `yaml:"specialization"`
|
Specialization string `yaml:"specialization"`
|
||||||
MaxTasks int `yaml:"max_tasks"`
|
MaxTasks int `yaml:"max_tasks"`
|
||||||
Capabilities []string `yaml:"capabilities"`
|
Capabilities []string `yaml:"capabilities"`
|
||||||
Models []string `yaml:"models"`
|
Models []string `yaml:"models"`
|
||||||
Role string `yaml:"role"`
|
Role string `yaml:"role"`
|
||||||
Expertise []string `yaml:"expertise"`
|
Project string `yaml:"project"`
|
||||||
ReportsTo string `yaml:"reports_to"`
|
Expertise []string `yaml:"expertise"`
|
||||||
Deliverables []string `yaml:"deliverables"`
|
ReportsTo string `yaml:"reports_to"`
|
||||||
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
Deliverables []string `yaml:"deliverables"`
|
||||||
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
ModelSelectionWebhook string `yaml:"model_selection_webhook"`
|
||||||
|
DefaultReasoningModel string `yaml:"default_reasoning_model"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NetworkConfig defines network and API settings
|
// NetworkConfig defines network and API settings
|
||||||
@@ -65,9 +66,9 @@ type LicenseConfig struct {
|
|||||||
|
|
||||||
// AIConfig defines AI service settings
|
// AIConfig defines AI service settings
|
||||||
type AIConfig struct {
|
type AIConfig struct {
|
||||||
Provider string `yaml:"provider"`
|
Provider string `yaml:"provider"`
|
||||||
Ollama OllamaConfig `yaml:"ollama"`
|
Ollama OllamaConfig `yaml:"ollama"`
|
||||||
ResetData ResetDataConfig `yaml:"resetdata"`
|
ResetData ResetDataConfig `yaml:"resetdata"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// OllamaConfig defines Ollama-specific settings
|
// OllamaConfig defines Ollama-specific settings
|
||||||
@@ -78,10 +79,10 @@ type OllamaConfig struct {
|
|||||||
|
|
||||||
// ResetDataConfig defines ResetData LLM service settings
|
// ResetDataConfig defines ResetData LLM service settings
|
||||||
type ResetDataConfig struct {
|
type ResetDataConfig struct {
|
||||||
BaseURL string `yaml:"base_url"`
|
BaseURL string `yaml:"base_url"`
|
||||||
APIKey string `yaml:"api_key"`
|
APIKey string `yaml:"api_key"`
|
||||||
Model string `yaml:"model"`
|
Model string `yaml:"model"`
|
||||||
Timeout time.Duration `yaml:"timeout"`
|
Timeout time.Duration `yaml:"timeout"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoggingConfig defines logging settings
|
// LoggingConfig defines logging settings
|
||||||
@@ -103,9 +104,9 @@ type DHTConfig struct {
|
|||||||
|
|
||||||
// UCXLConfig defines UCXL protocol settings
|
// UCXLConfig defines UCXL protocol settings
|
||||||
type UCXLConfig struct {
|
type UCXLConfig struct {
|
||||||
Enabled bool `yaml:"enabled"`
|
Enabled bool `yaml:"enabled"`
|
||||||
Server ServerConfig `yaml:"server"`
|
Server ServerConfig `yaml:"server"`
|
||||||
Storage StorageConfig `yaml:"storage"`
|
Storage StorageConfig `yaml:"storage"`
|
||||||
Resolution ResolutionConfig `yaml:"resolution"`
|
Resolution ResolutionConfig `yaml:"resolution"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -133,25 +134,26 @@ type SlurpConfig struct {
|
|||||||
|
|
||||||
// WHOOSHAPIConfig defines WHOOSH API integration settings
|
// WHOOSHAPIConfig defines WHOOSH API integration settings
|
||||||
type WHOOSHAPIConfig struct {
|
type WHOOSHAPIConfig struct {
|
||||||
URL string `yaml:"url"`
|
URL string `yaml:"url"`
|
||||||
BaseURL string `yaml:"base_url"`
|
BaseURL string `yaml:"base_url"`
|
||||||
Token string `yaml:"token"`
|
Token string `yaml:"token"`
|
||||||
Enabled bool `yaml:"enabled"`
|
Enabled bool `yaml:"enabled"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadFromEnvironment loads configuration from environment variables
|
// LoadFromEnvironment loads configuration from environment variables
|
||||||
func LoadFromEnvironment() (*Config, error) {
|
func LoadFromEnvironment() (*Config, error) {
|
||||||
cfg := &Config{
|
cfg := &Config{
|
||||||
Agent: AgentConfig{
|
Agent: AgentConfig{
|
||||||
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
ID: getEnvOrDefault("CHORUS_AGENT_ID", ""),
|
||||||
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
Specialization: getEnvOrDefault("CHORUS_SPECIALIZATION", "general_developer"),
|
||||||
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
MaxTasks: getEnvIntOrDefault("CHORUS_MAX_TASKS", 3),
|
||||||
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
Capabilities: getEnvArrayOrDefault("CHORUS_CAPABILITIES", []string{"general_development", "task_coordination"}),
|
||||||
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
Models: getEnvArrayOrDefault("CHORUS_MODELS", []string{"meta/llama-3.1-8b-instruct"}),
|
||||||
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
Role: getEnvOrDefault("CHORUS_ROLE", ""),
|
||||||
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
Project: getEnvOrDefault("CHORUS_PROJECT", "chorus"),
|
||||||
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
Expertise: getEnvArrayOrDefault("CHORUS_EXPERTISE", []string{}),
|
||||||
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
ReportsTo: getEnvOrDefault("CHORUS_REPORTS_TO", ""),
|
||||||
|
Deliverables: getEnvArrayOrDefault("CHORUS_DELIVERABLES", []string{}),
|
||||||
ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
|
ModelSelectionWebhook: getEnvOrDefault("CHORUS_MODEL_SELECTION_WEBHOOK", ""),
|
||||||
DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
|
DefaultReasoningModel: getEnvOrDefault("CHORUS_DEFAULT_REASONING_MODEL", "meta/llama-3.1-8b-instruct"),
|
||||||
},
|
},
|
||||||
@@ -214,10 +216,10 @@ func LoadFromEnvironment() (*Config, error) {
|
|||||||
AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
|
AuditLogging: getEnvBoolOrDefault("CHORUS_AUDIT_LOGGING", true),
|
||||||
AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
|
AuditPath: getEnvOrDefault("CHORUS_AUDIT_PATH", "/tmp/chorus-audit.log"),
|
||||||
ElectionConfig: ElectionConfig{
|
ElectionConfig: ElectionConfig{
|
||||||
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
DiscoveryTimeout: getEnvDurationOrDefault("CHORUS_DISCOVERY_TIMEOUT", 10*time.Second),
|
||||||
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
HeartbeatTimeout: getEnvDurationOrDefault("CHORUS_HEARTBEAT_TIMEOUT", 30*time.Second),
|
||||||
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
ElectionTimeout: getEnvDurationOrDefault("CHORUS_ELECTION_TIMEOUT", 60*time.Second),
|
||||||
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
DiscoveryBackoff: getEnvDurationOrDefault("CHORUS_DISCOVERY_BACKOFF", 5*time.Second),
|
||||||
LeadershipScoring: &LeadershipScoring{
|
LeadershipScoring: &LeadershipScoring{
|
||||||
UptimeWeight: 0.4,
|
UptimeWeight: 0.4,
|
||||||
CapabilityWeight: 0.3,
|
CapabilityWeight: 0.3,
|
||||||
@@ -247,7 +249,7 @@ func (c *Config) Validate() error {
|
|||||||
if c.License.LicenseID == "" {
|
if c.License.LicenseID == "" {
|
||||||
return fmt.Errorf("CHORUS_LICENSE_ID is required")
|
return fmt.Errorf("CHORUS_LICENSE_ID is required")
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.Agent.ID == "" {
|
if c.Agent.ID == "" {
|
||||||
// Auto-generate agent ID if not provided
|
// Auto-generate agent ID if not provided
|
||||||
hostname, _ := os.Hostname()
|
hostname, _ := os.Hostname()
|
||||||
@@ -258,7 +260,7 @@ func (c *Config) Validate() error {
|
|||||||
c.Agent.ID = fmt.Sprintf("chorus-%s", hostname)
|
c.Agent.ID = fmt.Sprintf("chorus-%s", hostname)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -329,14 +331,14 @@ func getEnvOrFileContent(envKey, fileEnvKey string) string {
|
|||||||
if value := os.Getenv(envKey); value != "" {
|
if value := os.Getenv(envKey); value != "" {
|
||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then try reading from file path specified in fileEnvKey
|
// Then try reading from file path specified in fileEnvKey
|
||||||
if filePath := os.Getenv(fileEnvKey); filePath != "" {
|
if filePath := os.Getenv(fileEnvKey); filePath != "" {
|
||||||
if content, err := ioutil.ReadFile(filePath); err == nil {
|
if content, err := ioutil.ReadFile(filePath); err == nil {
|
||||||
return strings.TrimSpace(string(content))
|
return strings.TrimSpace(string(content))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -360,4 +362,4 @@ func LoadConfig(configPath string) (*Config, error) {
|
|||||||
func SaveConfig(cfg *Config, configPath string) error {
|
func SaveConfig(cfg *Config, configPath string) error {
|
||||||
// For containers, configuration is environment-based, so this is a no-op
|
// For containers, configuration is environment-based, so this is a no-op
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,27 +12,27 @@ const (
|
|||||||
|
|
||||||
// SecurityConfig defines security-related configuration
|
// SecurityConfig defines security-related configuration
|
||||||
type SecurityConfig struct {
|
type SecurityConfig struct {
|
||||||
KeyRotationDays int `yaml:"key_rotation_days"`
|
KeyRotationDays int `yaml:"key_rotation_days"`
|
||||||
AuditLogging bool `yaml:"audit_logging"`
|
AuditLogging bool `yaml:"audit_logging"`
|
||||||
AuditPath string `yaml:"audit_path"`
|
AuditPath string `yaml:"audit_path"`
|
||||||
ElectionConfig ElectionConfig `yaml:"election"`
|
ElectionConfig ElectionConfig `yaml:"election"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ElectionConfig defines election timing and behavior settings
|
// ElectionConfig defines election timing and behavior settings
|
||||||
type ElectionConfig struct {
|
type ElectionConfig struct {
|
||||||
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
DiscoveryTimeout time.Duration `yaml:"discovery_timeout"`
|
||||||
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
|
||||||
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
ElectionTimeout time.Duration `yaml:"election_timeout"`
|
||||||
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
DiscoveryBackoff time.Duration `yaml:"discovery_backoff"`
|
||||||
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
LeadershipScoring *LeadershipScoring `yaml:"leadership_scoring,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LeadershipScoring defines weights for election scoring
|
// LeadershipScoring defines weights for election scoring
|
||||||
type LeadershipScoring struct {
|
type LeadershipScoring struct {
|
||||||
UptimeWeight float64 `yaml:"uptime_weight"`
|
UptimeWeight float64 `yaml:"uptime_weight"`
|
||||||
CapabilityWeight float64 `yaml:"capability_weight"`
|
CapabilityWeight float64 `yaml:"capability_weight"`
|
||||||
ExperienceWeight float64 `yaml:"experience_weight"`
|
ExperienceWeight float64 `yaml:"experience_weight"`
|
||||||
LoadWeight float64 `yaml:"load_weight"`
|
LoadWeight float64 `yaml:"load_weight"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AgeKeyPair represents an Age encryption key pair
|
// AgeKeyPair represents an Age encryption key pair
|
||||||
@@ -43,14 +43,14 @@ type AgeKeyPair struct {
|
|||||||
|
|
||||||
// RoleDefinition represents a role configuration
|
// RoleDefinition represents a role configuration
|
||||||
type RoleDefinition struct {
|
type RoleDefinition struct {
|
||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
Description string `yaml:"description"`
|
Description string `yaml:"description"`
|
||||||
Capabilities []string `yaml:"capabilities"`
|
Capabilities []string `yaml:"capabilities"`
|
||||||
AccessLevel string `yaml:"access_level"`
|
AccessLevel string `yaml:"access_level"`
|
||||||
AuthorityLevel string `yaml:"authority_level"`
|
AuthorityLevel string `yaml:"authority_level"`
|
||||||
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
Keys *AgeKeyPair `yaml:"keys,omitempty"`
|
||||||
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
AgeKeys *AgeKeyPair `yaml:"age_keys,omitempty"` // Legacy field name
|
||||||
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
CanDecrypt []string `yaml:"can_decrypt,omitempty"` // Roles this role can decrypt
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetPredefinedRoles returns the predefined roles for the system
|
// GetPredefinedRoles returns the predefined roles for the system
|
||||||
@@ -65,7 +65,7 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
|
|||||||
CanDecrypt: []string{"project_manager", "backend_developer", "frontend_developer", "devops_engineer", "security_engineer"},
|
CanDecrypt: []string{"project_manager", "backend_developer", "frontend_developer", "devops_engineer", "security_engineer"},
|
||||||
},
|
},
|
||||||
"backend_developer": {
|
"backend_developer": {
|
||||||
Name: "backend_developer",
|
Name: "backend_developer",
|
||||||
Description: "Backend development and API work",
|
Description: "Backend development and API work",
|
||||||
Capabilities: []string{"backend", "api", "database"},
|
Capabilities: []string{"backend", "api", "database"},
|
||||||
AccessLevel: "medium",
|
AccessLevel: "medium",
|
||||||
@@ -90,12 +90,52 @@ func GetPredefinedRoles() map[string]*RoleDefinition {
|
|||||||
},
|
},
|
||||||
"security_engineer": {
|
"security_engineer": {
|
||||||
Name: "security_engineer",
|
Name: "security_engineer",
|
||||||
Description: "Security oversight and hardening",
|
Description: "Security oversight and hardening",
|
||||||
Capabilities: []string{"security", "audit", "compliance"},
|
Capabilities: []string{"security", "audit", "compliance"},
|
||||||
AccessLevel: "high",
|
AccessLevel: "high",
|
||||||
AuthorityLevel: AuthorityAdmin,
|
AuthorityLevel: AuthorityAdmin,
|
||||||
CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"},
|
CanDecrypt: []string{"security_engineer", "project_manager", "backend_developer", "frontend_developer", "devops_engineer"},
|
||||||
},
|
},
|
||||||
|
"security_expert": {
|
||||||
|
Name: "security_expert",
|
||||||
|
Description: "Advanced security analysis and policy work",
|
||||||
|
Capabilities: []string{"security", "policy", "response"},
|
||||||
|
AccessLevel: "high",
|
||||||
|
AuthorityLevel: AuthorityAdmin,
|
||||||
|
CanDecrypt: []string{"security_expert", "security_engineer", "project_manager"},
|
||||||
|
},
|
||||||
|
"senior_software_architect": {
|
||||||
|
Name: "senior_software_architect",
|
||||||
|
Description: "Architecture governance and system design",
|
||||||
|
Capabilities: []string{"architecture", "design", "coordination"},
|
||||||
|
AccessLevel: "high",
|
||||||
|
AuthorityLevel: AuthorityAdmin,
|
||||||
|
CanDecrypt: []string{"senior_software_architect", "project_manager", "backend_developer", "frontend_developer"},
|
||||||
|
},
|
||||||
|
"qa_engineer": {
|
||||||
|
Name: "qa_engineer",
|
||||||
|
Description: "Quality assurance and testing",
|
||||||
|
Capabilities: []string{"testing", "validation"},
|
||||||
|
AccessLevel: "medium",
|
||||||
|
AuthorityLevel: AuthorityFull,
|
||||||
|
CanDecrypt: []string{"qa_engineer", "backend_developer", "frontend_developer"},
|
||||||
|
},
|
||||||
|
"readonly_user": {
|
||||||
|
Name: "readonly_user",
|
||||||
|
Description: "Read-only observer with audit access",
|
||||||
|
Capabilities: []string{"observation"},
|
||||||
|
AccessLevel: "low",
|
||||||
|
AuthorityLevel: AuthorityReadOnly,
|
||||||
|
CanDecrypt: []string{"readonly_user"},
|
||||||
|
},
|
||||||
|
"suggestion_only_role": {
|
||||||
|
Name: "suggestion_only_role",
|
||||||
|
Description: "Can propose suggestions but not execute",
|
||||||
|
Capabilities: []string{"recommendation"},
|
||||||
|
AccessLevel: "low",
|
||||||
|
AuthorityLevel: AuthoritySuggestion,
|
||||||
|
CanDecrypt: []string{"suggestion_only_role"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -106,16 +146,16 @@ func (c *Config) CanDecryptRole(targetRole string) (bool, error) {
|
|||||||
if !exists {
|
if !exists {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
targetRoleDef, exists := roles[targetRole]
|
targetRoleDef, exists := roles[targetRole]
|
||||||
if !exists {
|
if !exists {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple access level check
|
// Simple access level check
|
||||||
currentLevel := getAccessLevelValue(currentRole.AccessLevel)
|
currentLevel := getAccessLevelValue(currentRole.AccessLevel)
|
||||||
targetLevel := getAccessLevelValue(targetRoleDef.AccessLevel)
|
targetLevel := getAccessLevelValue(targetRoleDef.AccessLevel)
|
||||||
|
|
||||||
return currentLevel >= targetLevel, nil
|
return currentLevel >= targetLevel, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,4 +170,4 @@ func getAccessLevelValue(level string) int {
|
|||||||
default:
|
default:
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
213
pkg/dht/dht.go
213
pkg/dht/dht.go
@@ -6,33 +6,34 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"crypto/sha256"
|
||||||
|
"github.com/ipfs/go-cid"
|
||||||
|
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||||||
"github.com/libp2p/go-libp2p/core/host"
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
"github.com/libp2p/go-libp2p/core/peer"
|
"github.com/libp2p/go-libp2p/core/peer"
|
||||||
"github.com/libp2p/go-libp2p/core/protocol"
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
"github.com/libp2p/go-libp2p/core/routing"
|
"github.com/libp2p/go-libp2p/core/routing"
|
||||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
|
||||||
"github.com/multiformats/go-multiaddr"
|
"github.com/multiformats/go-multiaddr"
|
||||||
"github.com/multiformats/go-multihash"
|
"github.com/multiformats/go-multihash"
|
||||||
"github.com/ipfs/go-cid"
|
|
||||||
"crypto/sha256"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery
|
// LibP2PDHT provides distributed hash table functionality for CHORUS peer discovery
|
||||||
type LibP2PDHT struct {
|
type LibP2PDHT struct {
|
||||||
host host.Host
|
host host.Host
|
||||||
kdht *dht.IpfsDHT
|
kdht *dht.IpfsDHT
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
config *Config
|
config *Config
|
||||||
|
startTime time.Time
|
||||||
|
|
||||||
// Bootstrap state
|
// Bootstrap state
|
||||||
bootstrapped bool
|
bootstrapped bool
|
||||||
bootstrapMutex sync.RWMutex
|
bootstrapMutex sync.RWMutex
|
||||||
|
|
||||||
// Peer management
|
// Peer management
|
||||||
knownPeers map[peer.ID]*PeerInfo
|
knownPeers map[peer.ID]*PeerInfo
|
||||||
peersMutex sync.RWMutex
|
peersMutex sync.RWMutex
|
||||||
|
|
||||||
// Replication management
|
// Replication management
|
||||||
replicationManager *ReplicationManager
|
replicationManager *ReplicationManager
|
||||||
}
|
}
|
||||||
@@ -41,30 +42,32 @@ type LibP2PDHT struct {
|
|||||||
type Config struct {
|
type Config struct {
|
||||||
// Bootstrap nodes for initial DHT discovery
|
// Bootstrap nodes for initial DHT discovery
|
||||||
BootstrapPeers []multiaddr.Multiaddr
|
BootstrapPeers []multiaddr.Multiaddr
|
||||||
|
|
||||||
// Protocol prefix for CHORUS DHT
|
// Protocol prefix for CHORUS DHT
|
||||||
ProtocolPrefix string
|
ProtocolPrefix string
|
||||||
|
|
||||||
// Bootstrap timeout
|
// Bootstrap timeout
|
||||||
BootstrapTimeout time.Duration
|
BootstrapTimeout time.Duration
|
||||||
|
|
||||||
// Peer discovery interval
|
// Peer discovery interval
|
||||||
DiscoveryInterval time.Duration
|
DiscoveryInterval time.Duration
|
||||||
|
|
||||||
// DHT mode (client, server, auto)
|
// DHT mode (client, server, auto)
|
||||||
Mode dht.ModeOpt
|
Mode dht.ModeOpt
|
||||||
|
|
||||||
// Enable automatic bootstrap
|
// Enable automatic bootstrap
|
||||||
AutoBootstrap bool
|
AutoBootstrap bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// PeerInfo holds information about discovered peers
|
// PeerInfo holds information about discovered peers
|
||||||
|
const defaultProviderResultLimit = 20
|
||||||
|
|
||||||
type PeerInfo struct {
|
type PeerInfo struct {
|
||||||
ID peer.ID
|
ID peer.ID
|
||||||
Addresses []multiaddr.Multiaddr
|
Addresses []multiaddr.Multiaddr
|
||||||
Agent string
|
Agent string
|
||||||
Role string
|
Role string
|
||||||
LastSeen time.Time
|
LastSeen time.Time
|
||||||
Capabilities []string
|
Capabilities []string
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,23 +77,28 @@ func DefaultConfig() *Config {
|
|||||||
ProtocolPrefix: "/CHORUS",
|
ProtocolPrefix: "/CHORUS",
|
||||||
BootstrapTimeout: 30 * time.Second,
|
BootstrapTimeout: 30 * time.Second,
|
||||||
DiscoveryInterval: 60 * time.Second,
|
DiscoveryInterval: 60 * time.Second,
|
||||||
Mode: dht.ModeAuto,
|
Mode: dht.ModeAuto,
|
||||||
AutoBootstrap: true,
|
AutoBootstrap: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewLibP2PDHT creates a new LibP2PDHT instance
|
// NewDHT is a backward compatible helper that delegates to NewLibP2PDHT.
|
||||||
|
func NewDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||||
|
return NewLibP2PDHT(ctx, host, opts...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLibP2PDHT creates a new LibP2PDHT instance
|
||||||
func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PDHT, error) {
|
||||||
config := DefaultConfig()
|
config := DefaultConfig()
|
||||||
for _, opt := range opts {
|
for _, opt := range opts {
|
||||||
opt(config)
|
opt(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create context with cancellation
|
// Create context with cancellation
|
||||||
dhtCtx, cancel := context.WithCancel(ctx)
|
dhtCtx, cancel := context.WithCancel(ctx)
|
||||||
|
|
||||||
// Create Kademlia DHT
|
// Create Kademlia DHT
|
||||||
kdht, err := dht.New(dhtCtx, host,
|
kdht, err := dht.New(dhtCtx, host,
|
||||||
dht.Mode(config.Mode),
|
dht.Mode(config.Mode),
|
||||||
dht.ProtocolPrefix(protocol.ID(config.ProtocolPrefix)),
|
dht.ProtocolPrefix(protocol.ID(config.ProtocolPrefix)),
|
||||||
)
|
)
|
||||||
@@ -98,22 +106,23 @@ func NewLibP2PDHT(ctx context.Context, host host.Host, opts ...Option) (*LibP2PD
|
|||||||
cancel()
|
cancel()
|
||||||
return nil, fmt.Errorf("failed to create DHT: %w", err)
|
return nil, fmt.Errorf("failed to create DHT: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
d := &LibP2PDHT{
|
d := &LibP2PDHT{
|
||||||
host: host,
|
host: host,
|
||||||
kdht: kdht,
|
kdht: kdht,
|
||||||
ctx: dhtCtx,
|
ctx: dhtCtx,
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
config: config,
|
config: config,
|
||||||
|
startTime: time.Now(),
|
||||||
knownPeers: make(map[peer.ID]*PeerInfo),
|
knownPeers: make(map[peer.ID]*PeerInfo),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize replication manager
|
// Initialize replication manager
|
||||||
d.replicationManager = NewReplicationManager(dhtCtx, kdht, DefaultReplicationConfig())
|
d.replicationManager = NewReplicationManager(dhtCtx, kdht, DefaultReplicationConfig())
|
||||||
|
|
||||||
// Start background processes
|
// Start background processes
|
||||||
go d.startBackgroundTasks()
|
go d.startBackgroundTasks()
|
||||||
|
|
||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -178,25 +187,25 @@ func WithAutoBootstrap(auto bool) Option {
|
|||||||
func (d *LibP2PDHT) Bootstrap() error {
|
func (d *LibP2PDHT) Bootstrap() error {
|
||||||
d.bootstrapMutex.Lock()
|
d.bootstrapMutex.Lock()
|
||||||
defer d.bootstrapMutex.Unlock()
|
defer d.bootstrapMutex.Unlock()
|
||||||
|
|
||||||
if d.bootstrapped {
|
if d.bootstrapped {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect to bootstrap peers
|
// Connect to bootstrap peers
|
||||||
if len(d.config.BootstrapPeers) == 0 {
|
if len(d.config.BootstrapPeers) == 0 {
|
||||||
// Use default IPFS bootstrap peers if none configured
|
// Use default IPFS bootstrap peers if none configured
|
||||||
d.config.BootstrapPeers = dht.DefaultBootstrapPeers
|
d.config.BootstrapPeers = dht.DefaultBootstrapPeers
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bootstrap the DHT
|
// Bootstrap the DHT
|
||||||
bootstrapCtx, cancel := context.WithTimeout(d.ctx, d.config.BootstrapTimeout)
|
bootstrapCtx, cancel := context.WithTimeout(d.ctx, d.config.BootstrapTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
if err := d.kdht.Bootstrap(bootstrapCtx); err != nil {
|
if err := d.kdht.Bootstrap(bootstrapCtx); err != nil {
|
||||||
return fmt.Errorf("DHT bootstrap failed: %w", err)
|
return fmt.Errorf("DHT bootstrap failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect to bootstrap peers
|
// Connect to bootstrap peers
|
||||||
var connected int
|
var connected int
|
||||||
for _, peerAddr := range d.config.BootstrapPeers {
|
for _, peerAddr := range d.config.BootstrapPeers {
|
||||||
@@ -204,7 +213,7 @@ func (d *LibP2PDHT) Bootstrap() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
connectCtx, cancel := context.WithTimeout(d.ctx, 10*time.Second)
|
connectCtx, cancel := context.WithTimeout(d.ctx, 10*time.Second)
|
||||||
if err := d.host.Connect(connectCtx, *addrInfo); err != nil {
|
if err := d.host.Connect(connectCtx, *addrInfo); err != nil {
|
||||||
cancel()
|
cancel()
|
||||||
@@ -213,11 +222,11 @@ func (d *LibP2PDHT) Bootstrap() error {
|
|||||||
cancel()
|
cancel()
|
||||||
connected++
|
connected++
|
||||||
}
|
}
|
||||||
|
|
||||||
if connected == 0 {
|
if connected == 0 {
|
||||||
return fmt.Errorf("failed to connect to any bootstrap peers")
|
return fmt.Errorf("failed to connect to any bootstrap peers")
|
||||||
}
|
}
|
||||||
|
|
||||||
d.bootstrapped = true
|
d.bootstrapped = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -233,13 +242,13 @@ func (d *LibP2PDHT) IsBootstrapped() bool {
|
|||||||
func (d *LibP2PDHT) keyToCID(key string) (cid.Cid, error) {
|
func (d *LibP2PDHT) keyToCID(key string) (cid.Cid, error) {
|
||||||
// Hash the key
|
// Hash the key
|
||||||
hash := sha256.Sum256([]byte(key))
|
hash := sha256.Sum256([]byte(key))
|
||||||
|
|
||||||
// Create multihash
|
// Create multihash
|
||||||
mh, err := multihash.EncodeName(hash[:], "sha2-256")
|
mh, err := multihash.EncodeName(hash[:], "sha2-256")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return cid.Undef, err
|
return cid.Undef, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create CID
|
// Create CID
|
||||||
return cid.NewCidV1(cid.Raw, mh), nil
|
return cid.NewCidV1(cid.Raw, mh), nil
|
||||||
}
|
}
|
||||||
@@ -249,13 +258,13 @@ func (d *LibP2PDHT) Provide(ctx context.Context, key string) error {
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return fmt.Errorf("DHT not bootstrapped")
|
return fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert key to CID
|
// Convert key to CID
|
||||||
keyCID, err := d.keyToCID(key)
|
keyCID, err := d.keyToCID(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create CID from key: %w", err)
|
return fmt.Errorf("failed to create CID from key: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return d.kdht.Provide(ctx, keyCID, true)
|
return d.kdht.Provide(ctx, keyCID, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -264,31 +273,32 @@ func (d *LibP2PDHT) FindProviders(ctx context.Context, key string, limit int) ([
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return nil, fmt.Errorf("DHT not bootstrapped")
|
return nil, fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert key to CID
|
// Convert key to CID
|
||||||
keyCID, err := d.keyToCID(key)
|
keyCID, err := d.keyToCID(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create CID from key: %w", err)
|
return nil, fmt.Errorf("failed to create CID from key: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find providers (FindProviders returns a channel and an error)
|
maxProviders := limit
|
||||||
providersChan, err := d.kdht.FindProviders(ctx, keyCID)
|
if maxProviders <= 0 {
|
||||||
if err != nil {
|
maxProviders = defaultProviderResultLimit
|
||||||
return nil, fmt.Errorf("failed to find providers: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect providers from channel
|
providerCtx, cancel := context.WithCancel(ctx)
|
||||||
providers := make([]peer.AddrInfo, 0, limit)
|
defer cancel()
|
||||||
// TODO: Fix libp2p FindProviders channel type mismatch
|
|
||||||
// The channel appears to return int instead of peer.AddrInfo in this version
|
providersChan := d.kdht.FindProvidersAsync(providerCtx, keyCID, maxProviders)
|
||||||
_ = providersChan // Avoid unused variable error
|
providers := make([]peer.AddrInfo, 0, maxProviders)
|
||||||
// for providerInfo := range providersChan {
|
|
||||||
// providers = append(providers, providerInfo)
|
for providerInfo := range providersChan {
|
||||||
// if len(providers) >= limit {
|
providers = append(providers, providerInfo)
|
||||||
// break
|
if limit > 0 && len(providers) >= limit {
|
||||||
// }
|
cancel()
|
||||||
// }
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return providers, nil
|
return providers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,7 +307,7 @@ func (d *LibP2PDHT) PutValue(ctx context.Context, key string, value []byte) erro
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return fmt.Errorf("DHT not bootstrapped")
|
return fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return d.kdht.PutValue(ctx, key, value)
|
return d.kdht.PutValue(ctx, key, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -306,7 +316,7 @@ func (d *LibP2PDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return nil, fmt.Errorf("DHT not bootstrapped")
|
return nil, fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return d.kdht.GetValue(ctx, key)
|
return d.kdht.GetValue(ctx, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -315,7 +325,7 @@ func (d *LibP2PDHT) FindPeer(ctx context.Context, peerID peer.ID) (peer.AddrInfo
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return peer.AddrInfo{}, fmt.Errorf("DHT not bootstrapped")
|
return peer.AddrInfo{}, fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
return d.kdht.FindPeer(ctx, peerID)
|
return d.kdht.FindPeer(ctx, peerID)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -329,14 +339,30 @@ func (d *LibP2PDHT) GetConnectedPeers() []peer.ID {
|
|||||||
return d.kdht.Host().Network().Peers()
|
return d.kdht.Host().Network().Peers()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetStats reports basic runtime statistics for the DHT
|
||||||
|
func (d *LibP2PDHT) GetStats() DHTStats {
|
||||||
|
stats := DHTStats{
|
||||||
|
TotalPeers: len(d.GetConnectedPeers()),
|
||||||
|
Uptime: time.Since(d.startTime),
|
||||||
|
}
|
||||||
|
|
||||||
|
if d.replicationManager != nil {
|
||||||
|
if metrics := d.replicationManager.GetMetrics(); metrics != nil {
|
||||||
|
stats.TotalKeys = int(metrics.TotalKeys)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats
|
||||||
|
}
|
||||||
|
|
||||||
// RegisterPeer registers a peer with capability information
|
// RegisterPeer registers a peer with capability information
|
||||||
func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) {
|
func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilities []string) {
|
||||||
d.peersMutex.Lock()
|
d.peersMutex.Lock()
|
||||||
defer d.peersMutex.Unlock()
|
defer d.peersMutex.Unlock()
|
||||||
|
|
||||||
// Get peer addresses from host
|
// Get peer addresses from host
|
||||||
peerInfo := d.host.Peerstore().PeerInfo(peerID)
|
peerInfo := d.host.Peerstore().PeerInfo(peerID)
|
||||||
|
|
||||||
d.knownPeers[peerID] = &PeerInfo{
|
d.knownPeers[peerID] = &PeerInfo{
|
||||||
ID: peerID,
|
ID: peerID,
|
||||||
Addresses: peerInfo.Addrs,
|
Addresses: peerInfo.Addrs,
|
||||||
@@ -351,12 +377,12 @@ func (d *LibP2PDHT) RegisterPeer(peerID peer.ID, agent, role string, capabilitie
|
|||||||
func (d *LibP2PDHT) GetKnownPeers() map[peer.ID]*PeerInfo {
|
func (d *LibP2PDHT) GetKnownPeers() map[peer.ID]*PeerInfo {
|
||||||
d.peersMutex.RLock()
|
d.peersMutex.RLock()
|
||||||
defer d.peersMutex.RUnlock()
|
defer d.peersMutex.RUnlock()
|
||||||
|
|
||||||
result := make(map[peer.ID]*PeerInfo)
|
result := make(map[peer.ID]*PeerInfo)
|
||||||
for id, info := range d.knownPeers {
|
for id, info := range d.knownPeers {
|
||||||
result[id] = info
|
result[id] = info
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -371,7 +397,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
d.peersMutex.RUnlock()
|
d.peersMutex.RUnlock()
|
||||||
|
|
||||||
// Also search DHT for role-based keys
|
// Also search DHT for role-based keys
|
||||||
roleKey := fmt.Sprintf("CHORUS:role:%s", role)
|
roleKey := fmt.Sprintf("CHORUS:role:%s", role)
|
||||||
providers, err := d.FindProviders(ctx, roleKey, 10)
|
providers, err := d.FindProviders(ctx, roleKey, 10)
|
||||||
@@ -379,11 +405,11 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
|||||||
// Return local peers even if DHT search fails
|
// Return local peers even if DHT search fails
|
||||||
return localPeers, nil
|
return localPeers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert providers to PeerInfo
|
// Convert providers to PeerInfo
|
||||||
var result []*PeerInfo
|
var result []*PeerInfo
|
||||||
result = append(result, localPeers...)
|
result = append(result, localPeers...)
|
||||||
|
|
||||||
for _, provider := range providers {
|
for _, provider := range providers {
|
||||||
// Skip if we already have this peer
|
// Skip if we already have this peer
|
||||||
found := false
|
found := false
|
||||||
@@ -402,7 +428,7 @@ func (d *LibP2PDHT) FindPeersByRole(ctx context.Context, role string) ([]*PeerIn
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -424,10 +450,10 @@ func (d *LibP2PDHT) startBackgroundTasks() {
|
|||||||
if d.config.AutoBootstrap {
|
if d.config.AutoBootstrap {
|
||||||
go d.autoBootstrap()
|
go d.autoBootstrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start periodic peer discovery
|
// Start periodic peer discovery
|
||||||
go d.periodicDiscovery()
|
go d.periodicDiscovery()
|
||||||
|
|
||||||
// Start peer cleanup
|
// Start peer cleanup
|
||||||
go d.peerCleanup()
|
go d.peerCleanup()
|
||||||
}
|
}
|
||||||
@@ -436,7 +462,7 @@ func (d *LibP2PDHT) startBackgroundTasks() {
|
|||||||
func (d *LibP2PDHT) autoBootstrap() {
|
func (d *LibP2PDHT) autoBootstrap() {
|
||||||
ticker := time.NewTicker(30 * time.Second)
|
ticker := time.NewTicker(30 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-d.ctx.Done():
|
case <-d.ctx.Done():
|
||||||
@@ -456,7 +482,7 @@ func (d *LibP2PDHT) autoBootstrap() {
|
|||||||
func (d *LibP2PDHT) periodicDiscovery() {
|
func (d *LibP2PDHT) periodicDiscovery() {
|
||||||
ticker := time.NewTicker(d.config.DiscoveryInterval)
|
ticker := time.NewTicker(d.config.DiscoveryInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-d.ctx.Done():
|
case <-d.ctx.Done():
|
||||||
@@ -473,13 +499,13 @@ func (d *LibP2PDHT) periodicDiscovery() {
|
|||||||
func (d *LibP2PDHT) performDiscovery() {
|
func (d *LibP2PDHT) performDiscovery() {
|
||||||
ctx, cancel := context.WithTimeout(d.ctx, 30*time.Second)
|
ctx, cancel := context.WithTimeout(d.ctx, 30*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Look for general CHORUS peers
|
// Look for general CHORUS peers
|
||||||
providers, err := d.FindProviders(ctx, "CHORUS:peer", 10)
|
providers, err := d.FindProviders(ctx, "CHORUS:peer", 10)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update known peers
|
// Update known peers
|
||||||
d.peersMutex.Lock()
|
d.peersMutex.Lock()
|
||||||
for _, provider := range providers {
|
for _, provider := range providers {
|
||||||
@@ -498,7 +524,7 @@ func (d *LibP2PDHT) performDiscovery() {
|
|||||||
func (d *LibP2PDHT) peerCleanup() {
|
func (d *LibP2PDHT) peerCleanup() {
|
||||||
ticker := time.NewTicker(5 * time.Minute)
|
ticker := time.NewTicker(5 * time.Minute)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-d.ctx.Done():
|
case <-d.ctx.Done():
|
||||||
@@ -513,9 +539,9 @@ func (d *LibP2PDHT) peerCleanup() {
|
|||||||
func (d *LibP2PDHT) cleanupStalePeers() {
|
func (d *LibP2PDHT) cleanupStalePeers() {
|
||||||
d.peersMutex.Lock()
|
d.peersMutex.Lock()
|
||||||
defer d.peersMutex.Unlock()
|
defer d.peersMutex.Unlock()
|
||||||
|
|
||||||
staleThreshold := time.Now().Add(-time.Hour) // 1 hour threshold
|
staleThreshold := time.Now().Add(-time.Hour) // 1 hour threshold
|
||||||
|
|
||||||
for peerID, peerInfo := range d.knownPeers {
|
for peerID, peerInfo := range d.knownPeers {
|
||||||
if peerInfo.LastSeen.Before(staleThreshold) {
|
if peerInfo.LastSeen.Before(staleThreshold) {
|
||||||
// Check if peer is still connected
|
// Check if peer is still connected
|
||||||
@@ -526,7 +552,7 @@ func (d *LibP2PDHT) cleanupStalePeers() {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !connected {
|
if !connected {
|
||||||
delete(d.knownPeers, peerID)
|
delete(d.knownPeers, peerID)
|
||||||
}
|
}
|
||||||
@@ -589,11 +615,11 @@ func (d *LibP2PDHT) EnableReplication(config *ReplicationConfig) error {
|
|||||||
if d.replicationManager != nil {
|
if d.replicationManager != nil {
|
||||||
return fmt.Errorf("replication already enabled")
|
return fmt.Errorf("replication already enabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
if config == nil {
|
if config == nil {
|
||||||
config = DefaultReplicationConfig()
|
config = DefaultReplicationConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
d.replicationManager = NewReplicationManager(d.ctx, d.kdht, config)
|
d.replicationManager = NewReplicationManager(d.ctx, d.kdht, config)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -603,11 +629,11 @@ func (d *LibP2PDHT) DisableReplication() error {
|
|||||||
if d.replicationManager == nil {
|
if d.replicationManager == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := d.replicationManager.Stop(); err != nil {
|
if err := d.replicationManager.Stop(); err != nil {
|
||||||
return fmt.Errorf("failed to stop replication manager: %w", err)
|
return fmt.Errorf("failed to stop replication manager: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
d.replicationManager = nil
|
d.replicationManager = nil
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -617,13 +643,18 @@ func (d *LibP2PDHT) IsReplicationEnabled() bool {
|
|||||||
return d.replicationManager != nil
|
return d.replicationManager != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReplicationManager returns the underlying replication manager if enabled.
|
||||||
|
func (d *LibP2PDHT) ReplicationManager() *ReplicationManager {
|
||||||
|
return d.replicationManager
|
||||||
|
}
|
||||||
|
|
||||||
// Close shuts down the DHT
|
// Close shuts down the DHT
|
||||||
func (d *LibP2PDHT) Close() error {
|
func (d *LibP2PDHT) Close() error {
|
||||||
// Stop replication manager first
|
// Stop replication manager first
|
||||||
if d.replicationManager != nil {
|
if d.replicationManager != nil {
|
||||||
d.replicationManager.Stop()
|
d.replicationManager.Stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
d.cancel()
|
d.cancel()
|
||||||
return d.kdht.Close()
|
return d.kdht.Close()
|
||||||
}
|
}
|
||||||
@@ -633,10 +664,10 @@ func (d *LibP2PDHT) RefreshRoutingTable() error {
|
|||||||
if !d.IsBootstrapped() {
|
if !d.IsBootstrapped() {
|
||||||
return fmt.Errorf("DHT not bootstrapped")
|
return fmt.Errorf("DHT not bootstrapped")
|
||||||
}
|
}
|
||||||
|
|
||||||
// RefreshRoutingTable() returns a channel with errors, not a direct error
|
// RefreshRoutingTable() returns a channel with errors, not a direct error
|
||||||
errChan := d.kdht.RefreshRoutingTable()
|
errChan := d.kdht.RefreshRoutingTable()
|
||||||
|
|
||||||
// Wait for the first error (if any) from the channel
|
// Wait for the first error (if any) from the channel
|
||||||
select {
|
select {
|
||||||
case err := <-errChan:
|
case err := <-errChan:
|
||||||
@@ -654,4 +685,4 @@ func (d *LibP2PDHT) GetDHTSize() int {
|
|||||||
// Host returns the underlying libp2p host
|
// Host returns the underlying libp2p host
|
||||||
func (d *LibP2PDHT) Host() host.Host {
|
func (d *LibP2PDHT) Host() host.Host {
|
||||||
return d.host
|
return d.host
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,546 +2,155 @@ package dht
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/libp2p/go-libp2p"
|
libp2p "github.com/libp2p/go-libp2p"
|
||||||
"github.com/libp2p/go-libp2p/core/host"
|
dhtmode "github.com/libp2p/go-libp2p-kad-dht"
|
||||||
"github.com/libp2p/go-libp2p/core/test"
|
"github.com/libp2p/go-libp2p/core/test"
|
||||||
dht "github.com/libp2p/go-libp2p-kad-dht"
|
|
||||||
"github.com/multiformats/go-multiaddr"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type harness struct {
|
||||||
|
ctx context.Context
|
||||||
|
host libp2pHost
|
||||||
|
dht *LibP2PDHT
|
||||||
|
}
|
||||||
|
|
||||||
|
type libp2pHost interface {
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
|
|
||||||
|
func newHarness(t *testing.T, opts ...Option) *harness {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||||
|
if err != nil {
|
||||||
|
cancel()
|
||||||
|
t.Fatalf("failed to create libp2p host: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
options := append([]Option{WithAutoBootstrap(false)}, opts...)
|
||||||
|
d, err := NewLibP2PDHT(ctx, host, options...)
|
||||||
|
if err != nil {
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
t.Fatalf("failed to create DHT: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Cleanup(func() {
|
||||||
|
d.Close()
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
})
|
||||||
|
|
||||||
|
return &harness{ctx: ctx, host: host, dht: d}
|
||||||
|
}
|
||||||
|
|
||||||
func TestDefaultConfig(t *testing.T) {
|
func TestDefaultConfig(t *testing.T) {
|
||||||
config := DefaultConfig()
|
cfg := DefaultConfig()
|
||||||
|
|
||||||
if config.ProtocolPrefix != "/CHORUS" {
|
if cfg.ProtocolPrefix != "/CHORUS" {
|
||||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", config.ProtocolPrefix)
|
t.Fatalf("expected protocol prefix '/CHORUS', got %s", cfg.ProtocolPrefix)
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.BootstrapTimeout != 30*time.Second {
|
if cfg.BootstrapTimeout != 30*time.Second {
|
||||||
t.Errorf("expected bootstrap timeout 30s, got %v", config.BootstrapTimeout)
|
t.Fatalf("expected bootstrap timeout 30s, got %v", cfg.BootstrapTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Mode != dht.ModeAuto {
|
if cfg.Mode != dhtmode.ModeAuto {
|
||||||
t.Errorf("expected mode auto, got %v", config.Mode)
|
t.Fatalf("expected mode auto, got %v", cfg.Mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !config.AutoBootstrap {
|
if !cfg.AutoBootstrap {
|
||||||
t.Error("expected auto bootstrap to be enabled")
|
t.Fatal("expected auto bootstrap to be enabled")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNewDHT(t *testing.T) {
|
func TestWithOptionsOverridesDefaults(t *testing.T) {
|
||||||
ctx := context.Background()
|
h := newHarness(t,
|
||||||
|
|
||||||
// Create a test host
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
// Test with default options
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
if d.host != host {
|
|
||||||
t.Error("host not set correctly")
|
|
||||||
}
|
|
||||||
|
|
||||||
if d.config.ProtocolPrefix != "/CHORUS" {
|
|
||||||
t.Errorf("expected protocol prefix '/CHORUS', got %s", d.config.ProtocolPrefix)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDHTWithOptions(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
// Test with custom options
|
|
||||||
d, err := NewDHT(ctx, host,
|
|
||||||
WithProtocolPrefix("/custom"),
|
WithProtocolPrefix("/custom"),
|
||||||
WithMode(dht.ModeClient),
|
WithDiscoveryInterval(2*time.Minute),
|
||||||
WithBootstrapTimeout(60*time.Second),
|
WithBootstrapTimeout(45*time.Second),
|
||||||
WithDiscoveryInterval(120*time.Second),
|
WithMode(dhtmode.ModeClient),
|
||||||
WithAutoBootstrap(false),
|
WithAutoBootstrap(true),
|
||||||
)
|
)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
cfg := h.dht.config
|
||||||
|
|
||||||
|
if cfg.ProtocolPrefix != "/custom" {
|
||||||
|
t.Fatalf("expected protocol prefix '/custom', got %s", cfg.ProtocolPrefix)
|
||||||
}
|
}
|
||||||
defer d.Close()
|
|
||||||
|
if cfg.DiscoveryInterval != 2*time.Minute {
|
||||||
if d.config.ProtocolPrefix != "/custom" {
|
t.Fatalf("expected discovery interval 2m, got %v", cfg.DiscoveryInterval)
|
||||||
t.Errorf("expected protocol prefix '/custom', got %s", d.config.ProtocolPrefix)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if d.config.Mode != dht.ModeClient {
|
if cfg.BootstrapTimeout != 45*time.Second {
|
||||||
t.Errorf("expected mode client, got %v", d.config.Mode)
|
t.Fatalf("expected bootstrap timeout 45s, got %v", cfg.BootstrapTimeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
if d.config.BootstrapTimeout != 60*time.Second {
|
if cfg.Mode != dhtmode.ModeClient {
|
||||||
t.Errorf("expected bootstrap timeout 60s, got %v", d.config.BootstrapTimeout)
|
t.Fatalf("expected mode client, got %v", cfg.Mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
if d.config.DiscoveryInterval != 120*time.Second {
|
if !cfg.AutoBootstrap {
|
||||||
t.Errorf("expected discovery interval 120s, got %v", d.config.DiscoveryInterval)
|
t.Fatal("expected auto bootstrap to remain enabled")
|
||||||
}
|
|
||||||
|
|
||||||
if d.config.AutoBootstrap {
|
|
||||||
t.Error("expected auto bootstrap to be disabled")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWithBootstrapPeersFromStrings(t *testing.T) {
|
func TestProvideRequiresBootstrap(t *testing.T) {
|
||||||
ctx := context.Background()
|
h := newHarness(t)
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
bootstrapAddrs := []string{
|
|
||||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1",
|
|
||||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2",
|
|
||||||
}
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
if len(d.config.BootstrapPeers) != 2 {
|
|
||||||
t.Errorf("expected 2 bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWithBootstrapPeersFromStringsInvalid(t *testing.T) {
|
err := h.dht.Provide(h.ctx, "key")
|
||||||
ctx := context.Background()
|
if err == nil {
|
||||||
|
t.Fatal("expected Provide to fail when not bootstrapped")
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
}
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
// Include invalid addresses - they should be filtered out
|
|
||||||
bootstrapAddrs := []string{
|
|
||||||
"/ip4/127.0.0.1/tcp/4001/p2p/QmTest1", // valid
|
|
||||||
"invalid-address", // invalid
|
|
||||||
"/ip4/127.0.0.1/tcp/4002/p2p/QmTest2", // valid
|
|
||||||
}
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithBootstrapPeersFromStrings(bootstrapAddrs))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Should have filtered out the invalid address
|
|
||||||
if len(d.config.BootstrapPeers) != 2 {
|
|
||||||
t.Errorf("expected 2 valid bootstrap peers, got %d", len(d.config.BootstrapPeers))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestBootstrapWithoutPeers(t *testing.T) {
|
if !strings.Contains(err.Error(), "not bootstrapped") {
|
||||||
ctx := context.Background()
|
t.Fatalf("expected error to indicate bootstrap requirement, got %v", err)
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Bootstrap should use default IPFS peers when none configured
|
|
||||||
err = d.Bootstrap()
|
|
||||||
// This might fail in test environment without network access, but should not panic
|
|
||||||
if err != nil {
|
|
||||||
// Expected in test environment
|
|
||||||
t.Logf("Bootstrap failed as expected in test environment: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsBootstrapped(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Should not be bootstrapped initially
|
|
||||||
if d.IsBootstrapped() {
|
|
||||||
t.Error("DHT should not be bootstrapped initially")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRegisterPeer(t *testing.T) {
|
func TestRegisterPeer(t *testing.T) {
|
||||||
ctx := context.Background()
|
h := newHarness(t)
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
peerID := test.RandPeerIDFatal(t)
|
peerID := test.RandPeerIDFatal(t)
|
||||||
agent := "claude"
|
|
||||||
role := "frontend"
|
h.dht.RegisterPeer(peerID, "apollo", "platform", []string{"go"})
|
||||||
capabilities := []string{"react", "javascript"}
|
|
||||||
|
peers := h.dht.GetKnownPeers()
|
||||||
d.RegisterPeer(peerID, agent, role, capabilities)
|
|
||||||
|
info, ok := peers[peerID]
|
||||||
knownPeers := d.GetKnownPeers()
|
if !ok {
|
||||||
if len(knownPeers) != 1 {
|
t.Fatalf("expected peer to be tracked")
|
||||||
t.Errorf("expected 1 known peer, got %d", len(knownPeers))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
peerInfo, exists := knownPeers[peerID]
|
if info.Agent != "apollo" {
|
||||||
if !exists {
|
t.Fatalf("expected agent apollo, got %s", info.Agent)
|
||||||
t.Error("peer not found in known peers")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if peerInfo.Agent != agent {
|
if info.Role != "platform" {
|
||||||
t.Errorf("expected agent %s, got %s", agent, peerInfo.Agent)
|
t.Fatalf("expected role platform, got %s", info.Role)
|
||||||
}
|
}
|
||||||
|
|
||||||
if peerInfo.Role != role {
|
if len(info.Capabilities) != 1 || info.Capabilities[0] != "go" {
|
||||||
t.Errorf("expected role %s, got %s", role, peerInfo.Role)
|
t.Fatalf("expected capability go, got %v", info.Capabilities)
|
||||||
}
|
|
||||||
|
|
||||||
if len(peerInfo.Capabilities) != len(capabilities) {
|
|
||||||
t.Errorf("expected %d capabilities, got %d", len(capabilities), len(peerInfo.Capabilities))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetConnectedPeers(t *testing.T) {
|
func TestGetStatsProvidesUptime(t *testing.T) {
|
||||||
ctx := context.Background()
|
h := newHarness(t)
|
||||||
|
|
||||||
host, err := libp2p.New()
|
stats := h.dht.GetStats()
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
if stats.TotalPeers != 0 {
|
||||||
|
t.Fatalf("expected zero peers, got %d", stats.TotalPeers)
|
||||||
}
|
}
|
||||||
defer host.Close()
|
|
||||||
|
if stats.Uptime < 0 {
|
||||||
d, err := NewDHT(ctx, host)
|
t.Fatalf("expected non-negative uptime, got %v", stats.Uptime)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Initially should have no connected peers
|
|
||||||
peers := d.GetConnectedPeers()
|
|
||||||
if len(peers) != 0 {
|
|
||||||
t.Errorf("expected 0 connected peers, got %d", len(peers))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPutAndGetValue(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Test without bootstrap (should fail)
|
|
||||||
key := "test-key"
|
|
||||||
value := []byte("test-value")
|
|
||||||
|
|
||||||
err = d.PutValue(ctx, key, value)
|
|
||||||
if err == nil {
|
|
||||||
t.Error("PutValue should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = d.GetValue(ctx, key)
|
|
||||||
if err == nil {
|
|
||||||
t.Error("GetValue should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProvideAndFindProviders(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Test without bootstrap (should fail)
|
|
||||||
key := "test-service"
|
|
||||||
|
|
||||||
err = d.Provide(ctx, key)
|
|
||||||
if err == nil {
|
|
||||||
t.Error("Provide should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = d.FindProviders(ctx, key, 10)
|
|
||||||
if err == nil {
|
|
||||||
t.Error("FindProviders should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFindPeer(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Test without bootstrap (should fail)
|
|
||||||
peerID := test.RandPeerIDFatal(t)
|
|
||||||
|
|
||||||
_, err = d.FindPeer(ctx, peerID)
|
|
||||||
if err == nil {
|
|
||||||
t.Error("FindPeer should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFindPeersByRole(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Register some local peers
|
|
||||||
peerID1 := test.RandPeerIDFatal(t)
|
|
||||||
peerID2 := test.RandPeerIDFatal(t)
|
|
||||||
|
|
||||||
d.RegisterPeer(peerID1, "claude", "frontend", []string{"react"})
|
|
||||||
d.RegisterPeer(peerID2, "claude", "backend", []string{"go"})
|
|
||||||
|
|
||||||
// Find frontend peers
|
|
||||||
frontendPeers, err := d.FindPeersByRole(ctx, "frontend")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to find peers by role: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(frontendPeers) != 1 {
|
|
||||||
t.Errorf("expected 1 frontend peer, got %d", len(frontendPeers))
|
|
||||||
}
|
|
||||||
|
|
||||||
if frontendPeers[0].ID != peerID1 {
|
|
||||||
t.Error("wrong peer returned for frontend role")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all peers with wildcard
|
|
||||||
allPeers, err := d.FindPeersByRole(ctx, "*")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to find all peers: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(allPeers) != 2 {
|
|
||||||
t.Errorf("expected 2 peers with wildcard, got %d", len(allPeers))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAnnounceRole(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Should fail when not bootstrapped
|
|
||||||
err = d.AnnounceRole(ctx, "frontend")
|
|
||||||
if err == nil {
|
|
||||||
t.Error("AnnounceRole should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAnnounceCapability(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Should fail when not bootstrapped
|
|
||||||
err = d.AnnounceCapability(ctx, "react")
|
|
||||||
if err == nil {
|
|
||||||
t.Error("AnnounceCapability should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetRoutingTable(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
rt := d.GetRoutingTable()
|
|
||||||
if rt == nil {
|
|
||||||
t.Error("routing table should not be nil")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetDHTSize(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
size := d.GetDHTSize()
|
|
||||||
// Should be 0 or small initially
|
|
||||||
if size < 0 {
|
|
||||||
t.Errorf("DHT size should be non-negative, got %d", size)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRefreshRoutingTable(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host, WithAutoBootstrap(false))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
// Should fail when not bootstrapped
|
|
||||||
err = d.RefreshRoutingTable()
|
|
||||||
if err == nil {
|
|
||||||
t.Error("RefreshRoutingTable should fail when DHT not bootstrapped")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestHost(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
defer d.Close()
|
|
||||||
|
|
||||||
if d.Host() != host {
|
|
||||||
t.Error("Host() should return the same host instance")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClose(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
host, err := libp2p.New()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create test host: %v", err)
|
|
||||||
}
|
|
||||||
defer host.Close()
|
|
||||||
|
|
||||||
d, err := NewDHT(ctx, host)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("failed to create DHT: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should close without error
|
|
||||||
err = d.Close()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Close() failed: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2,559 +2,155 @@ package dht
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"chorus/pkg/config"
|
"chorus/pkg/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestDHTSecurityPolicyEnforcement tests security policy enforcement in DHT operations
|
type securityTestCase struct {
|
||||||
func TestDHTSecurityPolicyEnforcement(t *testing.T) {
|
name string
|
||||||
ctx := context.Background()
|
role string
|
||||||
|
address string
|
||||||
testCases := []struct {
|
contentType string
|
||||||
name string
|
expectSuccess bool
|
||||||
currentRole string
|
expectErrHint string
|
||||||
operation string
|
}
|
||||||
ucxlAddress string
|
|
||||||
contentType string
|
func newTestEncryptedStorage(cfg *config.Config) *EncryptedDHTStorage {
|
||||||
expectSuccess bool
|
return &EncryptedDHTStorage{
|
||||||
expectedError string
|
ctx: context.Background(),
|
||||||
}{
|
config: cfg,
|
||||||
// Store operation tests
|
nodeID: "test-node",
|
||||||
|
cache: make(map[string]*CachedEntry),
|
||||||
|
metrics: &StorageMetrics{LastUpdate: time.Now()},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCheckStoreAccessPolicy(t *testing.T) {
|
||||||
|
cases := []securityTestCase{
|
||||||
{
|
{
|
||||||
name: "admin_can_store_all_content",
|
name: "backend developer can store",
|
||||||
currentRole: "admin",
|
role: "backend_developer",
|
||||||
operation: "store",
|
address: "agent1:backend_developer:api:endpoint",
|
||||||
ucxlAddress: "agent1:admin:system:security_audit",
|
|
||||||
contentType: "decision",
|
contentType: "decision",
|
||||||
expectSuccess: true,
|
expectSuccess: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "backend_developer_can_store_backend_content",
|
name: "project manager can store",
|
||||||
currentRole: "backend_developer",
|
role: "project_manager",
|
||||||
operation: "store",
|
address: "agent1:project_manager:plan:milestone",
|
||||||
ucxlAddress: "agent1:backend_developer:api:endpoint_design",
|
contentType: "decision",
|
||||||
contentType: "suggestion",
|
|
||||||
expectSuccess: true,
|
expectSuccess: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "readonly_role_cannot_store",
|
name: "read only user cannot store",
|
||||||
currentRole: "readonly_user",
|
role: "readonly_user",
|
||||||
operation: "store",
|
address: "agent1:readonly_user:note:observation",
|
||||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
contentType: "note",
|
||||||
contentType: "suggestion",
|
expectSuccess: false,
|
||||||
expectSuccess: false,
|
expectErrHint: "read-only authority",
|
||||||
expectedError: "read-only authority",
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "unknown_role_cannot_store",
|
name: "unknown role rejected",
|
||||||
currentRole: "invalid_role",
|
role: "ghost_role",
|
||||||
operation: "store",
|
address: "agent1:ghost_role:context",
|
||||||
ucxlAddress: "agent1:invalid_role:project:task",
|
contentType: "decision",
|
||||||
contentType: "decision",
|
expectSuccess: false,
|
||||||
expectSuccess: false,
|
expectErrHint: "unknown creator role",
|
||||||
expectedError: "unknown creator role",
|
|
||||||
},
|
|
||||||
|
|
||||||
// Retrieve operation tests
|
|
||||||
{
|
|
||||||
name: "any_valid_role_can_retrieve",
|
|
||||||
currentRole: "qa_engineer",
|
|
||||||
operation: "retrieve",
|
|
||||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
|
||||||
expectSuccess: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "unknown_role_cannot_retrieve",
|
|
||||||
currentRole: "nonexistent_role",
|
|
||||||
operation: "retrieve",
|
|
||||||
ucxlAddress: "agent1:backend_developer:api:test_data",
|
|
||||||
expectSuccess: false,
|
|
||||||
expectedError: "unknown current role",
|
|
||||||
},
|
|
||||||
|
|
||||||
// Announce operation tests
|
|
||||||
{
|
|
||||||
name: "coordination_role_can_announce",
|
|
||||||
currentRole: "senior_software_architect",
|
|
||||||
operation: "announce",
|
|
||||||
ucxlAddress: "agent1:senior_software_architect:architecture:blueprint",
|
|
||||||
expectSuccess: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "decision_role_can_announce",
|
|
||||||
currentRole: "security_expert",
|
|
||||||
operation: "announce",
|
|
||||||
ucxlAddress: "agent1:security_expert:security:policy",
|
|
||||||
expectSuccess: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "suggestion_role_cannot_announce",
|
|
||||||
currentRole: "suggestion_only_role",
|
|
||||||
operation: "announce",
|
|
||||||
ucxlAddress: "agent1:suggestion_only_role:project:idea",
|
|
||||||
expectSuccess: false,
|
|
||||||
expectedError: "lacks authority",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "readonly_role_cannot_announce",
|
|
||||||
currentRole: "readonly_user",
|
|
||||||
operation: "announce",
|
|
||||||
ucxlAddress: "agent1:readonly_user:project:observation",
|
|
||||||
expectSuccess: false,
|
|
||||||
expectedError: "lacks authority",
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||||
|
eds := newTestEncryptedStorage(cfg)
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
// Create test configuration
|
err := eds.checkStoreAccessPolicy(tc.role, tc.address, tc.contentType)
|
||||||
cfg := &config.Config{
|
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-agent",
|
|
||||||
Role: tc.currentRole,
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: true,
|
|
||||||
AuditPath: "/tmp/test-security-audit.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create mock encrypted storage
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
var err error
|
|
||||||
switch tc.operation {
|
|
||||||
case "store":
|
|
||||||
err = eds.checkStoreAccessPolicy(tc.currentRole, tc.ucxlAddress, tc.contentType)
|
|
||||||
case "retrieve":
|
|
||||||
err = eds.checkRetrieveAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
|
||||||
case "announce":
|
|
||||||
err = eds.checkAnnounceAccessPolicy(tc.currentRole, tc.ucxlAddress)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tc.expectSuccess {
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Expected %s operation to succeed for role %s, but got error: %v",
|
|
||||||
tc.operation, tc.currentRole, err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err == nil {
|
|
||||||
t.Errorf("Expected %s operation to fail for role %s, but it succeeded",
|
|
||||||
tc.operation, tc.currentRole)
|
|
||||||
}
|
|
||||||
if tc.expectedError != "" && !containsSubstring(err.Error(), tc.expectedError) {
|
|
||||||
t.Errorf("Expected error to contain '%s', got '%s'", tc.expectedError, err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestDHTAuditLogging tests comprehensive audit logging for DHT operations
|
func TestCheckRetrieveAccessPolicy(t *testing.T) {
|
||||||
func TestDHTAuditLogging(t *testing.T) {
|
cases := []securityTestCase{
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
testCases := []struct {
|
|
||||||
name string
|
|
||||||
operation string
|
|
||||||
role string
|
|
||||||
ucxlAddress string
|
|
||||||
success bool
|
|
||||||
errorMsg string
|
|
||||||
expectAudit bool
|
|
||||||
}{
|
|
||||||
{
|
{
|
||||||
name: "successful_store_operation",
|
name: "qa engineer allowed",
|
||||||
operation: "store",
|
role: "qa_engineer",
|
||||||
role: "backend_developer",
|
address: "agent1:backend_developer:api:tests",
|
||||||
ucxlAddress: "agent1:backend_developer:api:user_service",
|
expectSuccess: true,
|
||||||
success: true,
|
|
||||||
expectAudit: true,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "failed_store_operation",
|
name: "unknown role rejected",
|
||||||
operation: "store",
|
role: "unknown",
|
||||||
role: "readonly_user",
|
address: "agent1:backend_developer:api:tests",
|
||||||
ucxlAddress: "agent1:readonly_user:project:readonly_attempt",
|
expectSuccess: false,
|
||||||
success: false,
|
expectErrHint: "unknown current role",
|
||||||
errorMsg: "read-only authority",
|
|
||||||
expectAudit: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "successful_retrieve_operation",
|
|
||||||
operation: "retrieve",
|
|
||||||
role: "frontend_developer",
|
|
||||||
ucxlAddress: "agent1:backend_developer:api:user_data",
|
|
||||||
success: true,
|
|
||||||
expectAudit: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "successful_announce_operation",
|
|
||||||
operation: "announce",
|
|
||||||
role: "senior_software_architect",
|
|
||||||
ucxlAddress: "agent1:senior_software_architect:architecture:system_design",
|
|
||||||
success: true,
|
|
||||||
expectAudit: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "audit_disabled_no_logging",
|
|
||||||
operation: "store",
|
|
||||||
role: "backend_developer",
|
|
||||||
ucxlAddress: "agent1:backend_developer:api:no_audit",
|
|
||||||
success: true,
|
|
||||||
expectAudit: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||||
|
eds := newTestEncryptedStorage(cfg)
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
// Create configuration with audit logging
|
err := eds.checkRetrieveAccessPolicy(tc.role, tc.address)
|
||||||
cfg := &config.Config{
|
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-agent",
|
|
||||||
Role: tc.role,
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: tc.expectAudit,
|
|
||||||
AuditPath: "/tmp/test-dht-audit.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create mock encrypted storage
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
// Capture audit output
|
|
||||||
auditCaptured := false
|
|
||||||
|
|
||||||
// Simulate audit operation
|
|
||||||
switch tc.operation {
|
|
||||||
case "store":
|
|
||||||
// Mock the audit function call
|
|
||||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
|
||||||
eds.auditStoreOperation(tc.ucxlAddress, tc.role, "test-content", 1024, tc.success, tc.errorMsg)
|
|
||||||
auditCaptured = true
|
|
||||||
}
|
|
||||||
case "retrieve":
|
|
||||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
|
||||||
eds.auditRetrieveOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
|
||||||
auditCaptured = true
|
|
||||||
}
|
|
||||||
case "announce":
|
|
||||||
if tc.expectAudit && cfg.Security.AuditLogging {
|
|
||||||
eds.auditAnnounceOperation(tc.ucxlAddress, tc.role, tc.success, tc.errorMsg)
|
|
||||||
auditCaptured = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify audit logging behavior
|
|
||||||
if tc.expectAudit && !auditCaptured {
|
|
||||||
t.Errorf("Expected audit logging for %s operation but none was captured", tc.operation)
|
|
||||||
}
|
|
||||||
if !tc.expectAudit && auditCaptured {
|
|
||||||
t.Errorf("Expected no audit logging for %s operation but audit was captured", tc.operation)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestSecurityConfigIntegration tests integration with SecurityConfig
|
func TestCheckAnnounceAccessPolicy(t *testing.T) {
|
||||||
func TestSecurityConfigIntegration(t *testing.T) {
|
cases := []securityTestCase{
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
testConfigs := []struct {
|
|
||||||
name string
|
|
||||||
auditLogging bool
|
|
||||||
auditPath string
|
|
||||||
expectAuditWork bool
|
|
||||||
}{
|
|
||||||
{
|
{
|
||||||
name: "audit_enabled_with_path",
|
name: "architect can announce",
|
||||||
auditLogging: true,
|
role: "senior_software_architect",
|
||||||
auditPath: "/tmp/test-audit-enabled.log",
|
address: "agent1:senior_software_architect:architecture:proposal",
|
||||||
expectAuditWork: true,
|
expectSuccess: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "audit_disabled",
|
name: "suggestion role cannot announce",
|
||||||
auditLogging: false,
|
role: "suggestion_only_role",
|
||||||
auditPath: "/tmp/test-audit-disabled.log",
|
address: "agent1:suggestion_only_role:idea",
|
||||||
expectAuditWork: false,
|
expectSuccess: false,
|
||||||
|
expectErrHint: "lacks authority",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "audit_enabled_no_path",
|
name: "unknown role rejected",
|
||||||
auditLogging: true,
|
role: "mystery",
|
||||||
auditPath: "",
|
address: "agent1:mystery:topic",
|
||||||
expectAuditWork: false,
|
expectSuccess: false,
|
||||||
|
expectErrHint: "unknown current role",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testConfigs {
|
cfg := &config.Config{Agent: config.AgentConfig{}}
|
||||||
|
eds := newTestEncryptedStorage(cfg)
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
cfg := &config.Config{
|
err := eds.checkAnnounceAccessPolicy(tc.role, tc.address)
|
||||||
Agent: config.AgentConfig{
|
verifySecurityExpectation(t, tc.expectSuccess, tc.expectErrHint, err)
|
||||||
ID: "test-agent",
|
|
||||||
Role: "backend_developer",
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: tc.auditLogging,
|
|
||||||
AuditPath: tc.auditPath,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
// Test audit function behavior with different configurations
|
|
||||||
auditWorked := func() bool {
|
|
||||||
if !cfg.Security.AuditLogging || cfg.Security.AuditPath == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}()
|
|
||||||
|
|
||||||
if auditWorked != tc.expectAuditWork {
|
|
||||||
t.Errorf("Expected audit to work: %v, but got: %v", tc.expectAuditWork, auditWorked)
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestRoleAuthorityHierarchy tests role authority hierarchy enforcement
|
func verifySecurityExpectation(t *testing.T, expectSuccess bool, hint string, err error) {
|
||||||
func TestRoleAuthorityHierarchy(t *testing.T) {
|
t.Helper()
|
||||||
ctx := context.Background()
|
|
||||||
|
if expectSuccess {
|
||||||
// Test role authority levels for different operations
|
if err != nil {
|
||||||
authorityTests := []struct {
|
t.Fatalf("expected success, got error: %v", err)
|
||||||
role string
|
}
|
||||||
authorityLevel config.AuthorityLevel
|
return
|
||||||
canStore bool
|
|
||||||
canRetrieve bool
|
|
||||||
canAnnounce bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
role: "admin",
|
|
||||||
authorityLevel: config.AuthorityMaster,
|
|
||||||
canStore: true,
|
|
||||||
canRetrieve: true,
|
|
||||||
canAnnounce: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "senior_software_architect",
|
|
||||||
authorityLevel: config.AuthorityDecision,
|
|
||||||
canStore: true,
|
|
||||||
canRetrieve: true,
|
|
||||||
canAnnounce: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "security_expert",
|
|
||||||
authorityLevel: config.AuthorityCoordination,
|
|
||||||
canStore: true,
|
|
||||||
canRetrieve: true,
|
|
||||||
canAnnounce: true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "backend_developer",
|
|
||||||
authorityLevel: config.AuthoritySuggestion,
|
|
||||||
canStore: true,
|
|
||||||
canRetrieve: true,
|
|
||||||
canAnnounce: false,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range authorityTests {
|
if err == nil {
|
||||||
t.Run(tt.role+"_authority_test", func(t *testing.T) {
|
t.Fatal("expected error but got success")
|
||||||
cfg := &config.Config{
|
}
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-agent",
|
|
||||||
Role: tt.role,
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: true,
|
|
||||||
AuditPath: "/tmp/test-authority.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
if hint != "" && !strings.Contains(err.Error(), hint) {
|
||||||
|
t.Fatalf("expected error to contain %q, got %q", hint, err.Error())
|
||||||
// Test store permission
|
|
||||||
storeErr := eds.checkStoreAccessPolicy(tt.role, "test:address", "content")
|
|
||||||
if tt.canStore && storeErr != nil {
|
|
||||||
t.Errorf("Role %s should be able to store but got error: %v", tt.role, storeErr)
|
|
||||||
}
|
|
||||||
if !tt.canStore && storeErr == nil {
|
|
||||||
t.Errorf("Role %s should not be able to store but operation succeeded", tt.role)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test retrieve permission
|
|
||||||
retrieveErr := eds.checkRetrieveAccessPolicy(tt.role, "test:address")
|
|
||||||
if tt.canRetrieve && retrieveErr != nil {
|
|
||||||
t.Errorf("Role %s should be able to retrieve but got error: %v", tt.role, retrieveErr)
|
|
||||||
}
|
|
||||||
if !tt.canRetrieve && retrieveErr == nil {
|
|
||||||
t.Errorf("Role %s should not be able to retrieve but operation succeeded", tt.role)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test announce permission
|
|
||||||
announceErr := eds.checkAnnounceAccessPolicy(tt.role, "test:address")
|
|
||||||
if tt.canAnnounce && announceErr != nil {
|
|
||||||
t.Errorf("Role %s should be able to announce but got error: %v", tt.role, announceErr)
|
|
||||||
}
|
|
||||||
if !tt.canAnnounce && announceErr == nil {
|
|
||||||
t.Errorf("Role %s should not be able to announce but operation succeeded", tt.role)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestSecurityMetrics tests security-related metrics
|
|
||||||
func TestSecurityMetrics(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-agent",
|
|
||||||
Role: "backend_developer",
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: true,
|
|
||||||
AuditPath: "/tmp/test-metrics.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
// Simulate some operations to generate metrics
|
|
||||||
for i := 0; i < 5; i++ {
|
|
||||||
eds.metrics.StoredItems++
|
|
||||||
eds.metrics.RetrievedItems++
|
|
||||||
eds.metrics.EncryptionOps++
|
|
||||||
eds.metrics.DecryptionOps++
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics := eds.GetMetrics()
|
|
||||||
|
|
||||||
expectedMetrics := map[string]int64{
|
|
||||||
"stored_items": 5,
|
|
||||||
"retrieved_items": 5,
|
|
||||||
"encryption_ops": 5,
|
|
||||||
"decryption_ops": 5,
|
|
||||||
}
|
|
||||||
|
|
||||||
for metricName, expectedValue := range expectedMetrics {
|
|
||||||
if actualValue, ok := metrics[metricName]; !ok {
|
|
||||||
t.Errorf("Expected metric %s to be present in metrics", metricName)
|
|
||||||
} else if actualValue != expectedValue {
|
|
||||||
t.Errorf("Expected %s to be %d, got %v", metricName, expectedValue, actualValue)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper functions
|
|
||||||
|
|
||||||
func createMockEncryptedStorage(ctx context.Context, cfg *config.Config) *EncryptedDHTStorage {
|
|
||||||
return &EncryptedDHTStorage{
|
|
||||||
ctx: ctx,
|
|
||||||
config: cfg,
|
|
||||||
nodeID: "test-node-id",
|
|
||||||
cache: make(map[string]*CachedEntry),
|
|
||||||
metrics: &StorageMetrics{
|
|
||||||
LastUpdate: time.Now(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func containsSubstring(str, substr string) bool {
|
|
||||||
if len(substr) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if len(str) < len(substr) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := 0; i <= len(str)-len(substr); i++ {
|
|
||||||
if str[i:i+len(substr)] == substr {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Benchmarks for security performance
|
|
||||||
|
|
||||||
func BenchmarkSecurityPolicyChecks(b *testing.B) {
|
|
||||||
ctx := context.Background()
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "bench-agent",
|
|
||||||
Role: "backend_developer",
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: true,
|
|
||||||
AuditPath: "/tmp/bench-security.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
b.Run("store_policy_check", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.checkStoreAccessPolicy("backend_developer", "test:address", "content")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
b.Run("retrieve_policy_check", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.checkRetrieveAccessPolicy("backend_developer", "test:address")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
b.Run("announce_policy_check", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.checkAnnounceAccessPolicy("senior_software_architect", "test:address")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkAuditOperations(b *testing.B) {
|
|
||||||
ctx := context.Background()
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "bench-agent",
|
|
||||||
Role: "backend_developer",
|
|
||||||
},
|
|
||||||
Security: config.SecurityConfig{
|
|
||||||
KeyRotationDays: 90,
|
|
||||||
AuditLogging: true,
|
|
||||||
AuditPath: "/tmp/bench-audit.log",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
eds := createMockEncryptedStorage(ctx, cfg)
|
|
||||||
|
|
||||||
b.ResetTimer()
|
|
||||||
|
|
||||||
b.Run("store_audit", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.auditStoreOperation("test:address", "backend_developer", "content", 1024, true, "")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
b.Run("retrieve_audit", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.auditRetrieveOperation("test:address", "backend_developer", true, "")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
b.Run("announce_audit", func(b *testing.B) {
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
eds.auditAnnounceOperation("test:address", "backend_developer", true, "")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
@@ -1,14 +1,117 @@
|
|||||||
package dht
|
package dht
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"chorus/pkg/config"
|
"chorus/pkg/config"
|
||||||
|
libp2p "github.com/libp2p/go-libp2p"
|
||||||
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
|
"github.com/libp2p/go-libp2p/core/peer"
|
||||||
|
"github.com/libp2p/go-libp2p/p2p/security/noise"
|
||||||
|
"github.com/libp2p/go-libp2p/p2p/transport/tcp"
|
||||||
|
"github.com/multiformats/go-multiaddr"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewRealDHT creates a new real DHT implementation
|
// RealDHT wraps a libp2p-based DHT to satisfy the generic DHT interface.
|
||||||
func NewRealDHT(config *config.HybridConfig) (DHT, error) {
|
type RealDHT struct {
|
||||||
// TODO: Implement real DHT initialization
|
cancel context.CancelFunc
|
||||||
// For now, return an error to indicate it's not yet implemented
|
host host.Host
|
||||||
return nil, fmt.Errorf("real DHT implementation not yet available")
|
dht *LibP2PDHT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewRealDHT creates a new real DHT implementation backed by libp2p.
|
||||||
|
func NewRealDHT(cfg *config.HybridConfig) (DHT, error) {
|
||||||
|
if cfg == nil {
|
||||||
|
cfg = &config.HybridConfig{}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
listenAddr, err := multiaddr.NewMultiaddr("/ip4/0.0.0.0/tcp/0")
|
||||||
|
if err != nil {
|
||||||
|
cancel()
|
||||||
|
return nil, fmt.Errorf("failed to create listen address: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
host, err := libp2p.New(
|
||||||
|
libp2p.ListenAddrs(listenAddr),
|
||||||
|
libp2p.Security(noise.ID, noise.New),
|
||||||
|
libp2p.Transport(tcp.NewTCPTransport),
|
||||||
|
libp2p.DefaultMuxers,
|
||||||
|
libp2p.EnableRelay(),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
cancel()
|
||||||
|
return nil, fmt.Errorf("failed to create libp2p host: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := []Option{
|
||||||
|
WithProtocolPrefix("/CHORUS"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodes := cfg.GetDHTBootstrapNodes(); len(nodes) > 0 {
|
||||||
|
opts = append(opts, WithBootstrapPeersFromStrings(nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
libp2pDHT, err := NewLibP2PDHT(ctx, host, opts...)
|
||||||
|
if err != nil {
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
return nil, fmt.Errorf("failed to initialize libp2p DHT: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := libp2pDHT.Bootstrap(); err != nil {
|
||||||
|
libp2pDHT.Close()
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
return nil, fmt.Errorf("failed to bootstrap DHT: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &RealDHT{
|
||||||
|
cancel: cancel,
|
||||||
|
host: host,
|
||||||
|
dht: libp2pDHT,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// PutValue stores a value in the DHT.
|
||||||
|
func (r *RealDHT) PutValue(ctx context.Context, key string, value []byte) error {
|
||||||
|
return r.dht.PutValue(ctx, key, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetValue retrieves a value from the DHT.
|
||||||
|
func (r *RealDHT) GetValue(ctx context.Context, key string) ([]byte, error) {
|
||||||
|
return r.dht.GetValue(ctx, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provide announces that this node can provide the given key.
|
||||||
|
func (r *RealDHT) Provide(ctx context.Context, key string) error {
|
||||||
|
return r.dht.Provide(ctx, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindProviders locates peers that can provide the specified key.
|
||||||
|
func (r *RealDHT) FindProviders(ctx context.Context, key string, limit int) ([]peer.AddrInfo, error) {
|
||||||
|
return r.dht.FindProviders(ctx, key, limit)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStats exposes runtime metrics for the real DHT.
|
||||||
|
func (r *RealDHT) GetStats() DHTStats {
|
||||||
|
return r.dht.GetStats()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close releases resources associated with the DHT.
|
||||||
|
func (r *RealDHT) Close() error {
|
||||||
|
r.cancel()
|
||||||
|
|
||||||
|
var errs []error
|
||||||
|
if err := r.dht.Close(); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
if err := r.host.Close(); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.Join(errs...)
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,159 +2,106 @@ package dht
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestReplicationManager tests basic replication manager functionality
|
func newReplicationManagerForTest(t *testing.T) *ReplicationManager {
|
||||||
func TestReplicationManager(t *testing.T) {
|
t.Helper()
|
||||||
ctx := context.Background()
|
|
||||||
|
cfg := &ReplicationConfig{
|
||||||
// Create a mock DHT for testing
|
ReplicationFactor: 3,
|
||||||
mockDHT := NewMockDHTInterface()
|
ReprovideInterval: time.Hour,
|
||||||
|
CleanupInterval: time.Hour,
|
||||||
// Create replication manager
|
ProviderTTL: 30 * time.Minute,
|
||||||
config := DefaultReplicationConfig()
|
MaxProvidersPerKey: 5,
|
||||||
config.ReprovideInterval = 1 * time.Second // Short interval for testing
|
EnableAutoReplication: false,
|
||||||
config.CleanupInterval = 1 * time.Second
|
EnableReprovide: false,
|
||||||
|
MaxConcurrentReplications: 1,
|
||||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), config)
|
}
|
||||||
defer rm.Stop()
|
|
||||||
|
rm := NewReplicationManager(context.Background(), nil, cfg)
|
||||||
// Test adding content
|
t.Cleanup(func() {
|
||||||
testKey := "test-content-key"
|
if rm.reprovideTimer != nil {
|
||||||
testSize := int64(1024)
|
rm.reprovideTimer.Stop()
|
||||||
testPriority := 5
|
}
|
||||||
|
if rm.cleanupTimer != nil {
|
||||||
err := rm.AddContent(testKey, testSize, testPriority)
|
rm.cleanupTimer.Stop()
|
||||||
|
}
|
||||||
|
rm.cancel()
|
||||||
|
})
|
||||||
|
return rm
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAddContentRegistersKey(t *testing.T) {
|
||||||
|
rm := newReplicationManagerForTest(t)
|
||||||
|
|
||||||
|
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||||
|
t.Fatalf("expected AddContent to succeed, got error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rm.keysMutex.RLock()
|
||||||
|
record, ok := rm.contentKeys["ucxl://example/path"]
|
||||||
|
rm.keysMutex.RUnlock()
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("expected content key to be registered")
|
||||||
|
}
|
||||||
|
|
||||||
|
if record.Size != 512 {
|
||||||
|
t.Fatalf("expected size 512, got %d", record.Size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRemoveContentClearsTracking(t *testing.T) {
|
||||||
|
rm := newReplicationManagerForTest(t)
|
||||||
|
|
||||||
|
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||||
|
t.Fatalf("AddContent returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := rm.RemoveContent("ucxl://example/path"); err != nil {
|
||||||
|
t.Fatalf("RemoveContent returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rm.keysMutex.RLock()
|
||||||
|
_, exists := rm.contentKeys["ucxl://example/path"]
|
||||||
|
rm.keysMutex.RUnlock()
|
||||||
|
|
||||||
|
if exists {
|
||||||
|
t.Fatal("expected content key to be removed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetReplicationStatusReturnsCopy(t *testing.T) {
|
||||||
|
rm := newReplicationManagerForTest(t)
|
||||||
|
|
||||||
|
if err := rm.AddContent("ucxl://example/path", 512, 1); err != nil {
|
||||||
|
t.Fatalf("AddContent returned error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status, err := rm.GetReplicationStatus("ucxl://example/path")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to add content: %v", err)
|
t.Fatalf("GetReplicationStatus returned error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test getting replication status
|
if status.Key != "ucxl://example/path" {
|
||||||
status, err := rm.GetReplicationStatus(testKey)
|
t.Fatalf("expected status key to match, got %s", status.Key)
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to get replication status: %v", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if status.Key != testKey {
|
// Mutating status should not affect internal state
|
||||||
t.Errorf("Expected key %s, got %s", testKey, status.Key)
|
status.HealthyProviders = 99
|
||||||
|
internal, _ := rm.GetReplicationStatus("ucxl://example/path")
|
||||||
|
if internal.HealthyProviders == 99 {
|
||||||
|
t.Fatal("expected GetReplicationStatus to return a copy")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if status.Size != testSize {
|
|
||||||
t.Errorf("Expected size %d, got %d", testSize, status.Size)
|
func TestGetMetricsReturnsSnapshot(t *testing.T) {
|
||||||
}
|
rm := newReplicationManagerForTest(t)
|
||||||
|
|
||||||
if status.Priority != testPriority {
|
|
||||||
t.Errorf("Expected priority %d, got %d", testPriority, status.Priority)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test providing content
|
|
||||||
err = rm.ProvideContent(testKey)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to provide content: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test metrics
|
|
||||||
metrics := rm.GetMetrics()
|
metrics := rm.GetMetrics()
|
||||||
if metrics.TotalKeys != 1 {
|
if metrics == rm.metrics {
|
||||||
t.Errorf("Expected 1 total key, got %d", metrics.TotalKeys)
|
t.Fatal("expected GetMetrics to return a copy of metrics")
|
||||||
}
|
|
||||||
|
|
||||||
// Test finding providers
|
|
||||||
providers, err := rm.FindProviders(ctx, testKey, 10)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to find providers: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Found %d providers for key %s", len(providers), testKey)
|
|
||||||
|
|
||||||
// Test removing content
|
|
||||||
err = rm.RemoveContent(testKey)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to remove content: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify content was removed
|
|
||||||
metrics = rm.GetMetrics()
|
|
||||||
if metrics.TotalKeys != 0 {
|
|
||||||
t.Errorf("Expected 0 total keys after removal, got %d", metrics.TotalKeys)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestLibP2PDHTReplication tests DHT replication functionality
|
|
||||||
func TestLibP2PDHTReplication(t *testing.T) {
|
|
||||||
// This would normally require a real libp2p setup
|
|
||||||
// For now, just test the interface methods exist
|
|
||||||
|
|
||||||
// Mock test - in a real implementation, you'd set up actual libp2p hosts
|
|
||||||
t.Log("DHT replication interface methods are implemented")
|
|
||||||
|
|
||||||
// Example of how the replication would be used:
|
|
||||||
// 1. Add content for replication
|
|
||||||
// 2. Content gets automatically provided to the DHT
|
|
||||||
// 3. Other nodes can discover this node as a provider
|
|
||||||
// 4. Periodic reproviding ensures content availability
|
|
||||||
// 5. Replication metrics track system health
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestReplicationConfig tests replication configuration
|
|
||||||
func TestReplicationConfig(t *testing.T) {
|
|
||||||
config := DefaultReplicationConfig()
|
|
||||||
|
|
||||||
// Test default values
|
|
||||||
if config.ReplicationFactor != 3 {
|
|
||||||
t.Errorf("Expected default replication factor 3, got %d", config.ReplicationFactor)
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.ReprovideInterval != 12*time.Hour {
|
|
||||||
t.Errorf("Expected default reprovide interval 12h, got %v", config.ReprovideInterval)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !config.EnableAutoReplication {
|
|
||||||
t.Error("Expected auto replication to be enabled by default")
|
|
||||||
}
|
|
||||||
|
|
||||||
if !config.EnableReprovide {
|
|
||||||
t.Error("Expected reprovide to be enabled by default")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestProviderInfo tests provider information tracking
|
|
||||||
func TestProviderInfo(t *testing.T) {
|
|
||||||
// Test distance calculation
|
|
||||||
key := []byte("test-key")
|
|
||||||
peerID := "test-peer-id"
|
|
||||||
|
|
||||||
distance := calculateDistance(key, []byte(peerID))
|
|
||||||
|
|
||||||
// Distance should be non-zero for different inputs
|
|
||||||
if distance == 0 {
|
|
||||||
t.Error("Expected non-zero distance for different inputs")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Distance between key and peer: %d", distance)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestReplicationMetrics tests metrics collection
|
|
||||||
func TestReplicationMetrics(t *testing.T) {
|
|
||||||
ctx := context.Background()
|
|
||||||
mockDHT := NewMockDHTInterface()
|
|
||||||
rm := NewReplicationManager(ctx, mockDHT.Mock(), DefaultReplicationConfig())
|
|
||||||
defer rm.Stop()
|
|
||||||
|
|
||||||
// Add some content
|
|
||||||
for i := 0; i < 3; i++ {
|
|
||||||
key := fmt.Sprintf("test-key-%d", i)
|
|
||||||
rm.AddContent(key, int64(1000+i*100), i+1)
|
|
||||||
}
|
|
||||||
|
|
||||||
metrics := rm.GetMetrics()
|
|
||||||
|
|
||||||
if metrics.TotalKeys != 3 {
|
|
||||||
t.Errorf("Expected 3 total keys, got %d", metrics.TotalKeys)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Replication metrics: %+v", metrics)
|
|
||||||
}
|
|
||||||
@@ -19,8 +19,8 @@ import (
|
|||||||
type ElectionTrigger string
|
type ElectionTrigger string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
TriggerHeartbeatTimeout ElectionTrigger = "admin_heartbeat_timeout"
|
||||||
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
TriggerDiscoveryFailure ElectionTrigger = "no_admin_discovered"
|
||||||
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
TriggerSplitBrain ElectionTrigger = "split_brain_detected"
|
||||||
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
TriggerQuorumRestored ElectionTrigger = "quorum_restored"
|
||||||
TriggerManual ElectionTrigger = "manual_trigger"
|
TriggerManual ElectionTrigger = "manual_trigger"
|
||||||
@@ -30,30 +30,35 @@ const (
|
|||||||
type ElectionState string
|
type ElectionState string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
StateIdle ElectionState = "idle"
|
electionTopic = "CHORUS/election/v1"
|
||||||
StateDiscovering ElectionState = "discovering"
|
adminHeartbeatTopic = "CHORUS/admin/heartbeat/v1"
|
||||||
StateElecting ElectionState = "electing"
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
StateIdle ElectionState = "idle"
|
||||||
|
StateDiscovering ElectionState = "discovering"
|
||||||
|
StateElecting ElectionState = "electing"
|
||||||
StateReconstructing ElectionState = "reconstructing_keys"
|
StateReconstructing ElectionState = "reconstructing_keys"
|
||||||
StateComplete ElectionState = "complete"
|
StateComplete ElectionState = "complete"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AdminCandidate represents a node candidate for admin role
|
// AdminCandidate represents a node candidate for admin role
|
||||||
type AdminCandidate struct {
|
type AdminCandidate struct {
|
||||||
NodeID string `json:"node_id"`
|
NodeID string `json:"node_id"`
|
||||||
PeerID peer.ID `json:"peer_id"`
|
PeerID peer.ID `json:"peer_id"`
|
||||||
Capabilities []string `json:"capabilities"`
|
Capabilities []string `json:"capabilities"`
|
||||||
Uptime time.Duration `json:"uptime"`
|
Uptime time.Duration `json:"uptime"`
|
||||||
Resources ResourceMetrics `json:"resources"`
|
Resources ResourceMetrics `json:"resources"`
|
||||||
Experience time.Duration `json:"experience"`
|
Experience time.Duration `json:"experience"`
|
||||||
Score float64 `json:"score"`
|
Score float64 `json:"score"`
|
||||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResourceMetrics holds node resource information for election scoring
|
// ResourceMetrics holds node resource information for election scoring
|
||||||
type ResourceMetrics struct {
|
type ResourceMetrics struct {
|
||||||
CPUUsage float64 `json:"cpu_usage"`
|
CPUUsage float64 `json:"cpu_usage"`
|
||||||
MemoryUsage float64 `json:"memory_usage"`
|
MemoryUsage float64 `json:"memory_usage"`
|
||||||
DiskUsage float64 `json:"disk_usage"`
|
DiskUsage float64 `json:"disk_usage"`
|
||||||
NetworkQuality float64 `json:"network_quality"`
|
NetworkQuality float64 `json:"network_quality"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,46 +73,46 @@ type ElectionMessage struct {
|
|||||||
|
|
||||||
// ElectionManager handles admin election coordination
|
// ElectionManager handles admin election coordination
|
||||||
type ElectionManager struct {
|
type ElectionManager struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
config *config.Config
|
config *config.Config
|
||||||
host libp2p.Host
|
host libp2p.Host
|
||||||
pubsub *pubsub.PubSub
|
pubsub *pubsub.PubSub
|
||||||
nodeID string
|
nodeID string
|
||||||
|
|
||||||
// Election state
|
// Election state
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
state ElectionState
|
state ElectionState
|
||||||
currentTerm int
|
currentTerm int
|
||||||
lastHeartbeat time.Time
|
lastHeartbeat time.Time
|
||||||
currentAdmin string
|
currentAdmin string
|
||||||
candidates map[string]*AdminCandidate
|
candidates map[string]*AdminCandidate
|
||||||
votes map[string]string // voter -> candidate
|
votes map[string]string // voter -> candidate
|
||||||
|
|
||||||
// Timers and channels
|
// Timers and channels
|
||||||
heartbeatTimer *time.Timer
|
heartbeatTimer *time.Timer
|
||||||
discoveryTimer *time.Timer
|
discoveryTimer *time.Timer
|
||||||
electionTimer *time.Timer
|
electionTimer *time.Timer
|
||||||
electionTrigger chan ElectionTrigger
|
electionTrigger chan ElectionTrigger
|
||||||
|
|
||||||
// Heartbeat management
|
// Heartbeat management
|
||||||
heartbeatManager *HeartbeatManager
|
heartbeatManager *HeartbeatManager
|
||||||
|
|
||||||
// Callbacks
|
// Callbacks
|
||||||
onAdminChanged func(oldAdmin, newAdmin string)
|
onAdminChanged func(oldAdmin, newAdmin string)
|
||||||
onElectionComplete func(winner string)
|
onElectionComplete func(winner string)
|
||||||
|
|
||||||
startTime time.Time
|
startTime time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// HeartbeatManager manages admin heartbeat lifecycle
|
// HeartbeatManager manages admin heartbeat lifecycle
|
||||||
type HeartbeatManager struct {
|
type HeartbeatManager struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
isRunning bool
|
isRunning bool
|
||||||
stopCh chan struct{}
|
stopCh chan struct{}
|
||||||
ticker *time.Ticker
|
ticker *time.Ticker
|
||||||
electionMgr *ElectionManager
|
electionMgr *ElectionManager
|
||||||
logger func(msg string, args ...interface{})
|
logger func(msg string, args ...interface{})
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewElectionManager creates a new election manager
|
// NewElectionManager creates a new election manager
|
||||||
@@ -119,7 +124,7 @@ func NewElectionManager(
|
|||||||
nodeID string,
|
nodeID string,
|
||||||
) *ElectionManager {
|
) *ElectionManager {
|
||||||
electionCtx, cancel := context.WithCancel(ctx)
|
electionCtx, cancel := context.WithCancel(ctx)
|
||||||
|
|
||||||
em := &ElectionManager{
|
em := &ElectionManager{
|
||||||
ctx: electionCtx,
|
ctx: electionCtx,
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
@@ -133,7 +138,7 @@ func NewElectionManager(
|
|||||||
electionTrigger: make(chan ElectionTrigger, 10),
|
electionTrigger: make(chan ElectionTrigger, 10),
|
||||||
startTime: time.Now(),
|
startTime: time.Now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize heartbeat manager
|
// Initialize heartbeat manager
|
||||||
em.heartbeatManager = &HeartbeatManager{
|
em.heartbeatManager = &HeartbeatManager{
|
||||||
electionMgr: em,
|
electionMgr: em,
|
||||||
@@ -141,29 +146,32 @@ func NewElectionManager(
|
|||||||
log.Printf("[HEARTBEAT] "+msg, args...)
|
log.Printf("[HEARTBEAT] "+msg, args...)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return em
|
return em
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start begins the election management system
|
// Start begins the election management system
|
||||||
func (em *ElectionManager) Start() error {
|
func (em *ElectionManager) Start() error {
|
||||||
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
log.Printf("🗳️ Starting election manager for node %s", em.nodeID)
|
||||||
|
|
||||||
// TODO: Subscribe to election-related messages - pubsub interface needs update
|
if err := em.pubsub.SubscribeRawTopic(electionTopic, func(data []byte, _ peer.ID) {
|
||||||
// if err := em.pubsub.Subscribe("CHORUS/election/v1", em.handleElectionMessage); err != nil {
|
em.handleElectionMessage(data)
|
||||||
// return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
}); err != nil {
|
||||||
// }
|
return fmt.Errorf("failed to subscribe to election messages: %w", err)
|
||||||
//
|
}
|
||||||
// if err := em.pubsub.Subscribe("CHORUS/admin/heartbeat/v1", em.handleAdminHeartbeat); err != nil {
|
|
||||||
// return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
if err := em.pubsub.SubscribeRawTopic(adminHeartbeatTopic, func(data []byte, _ peer.ID) {
|
||||||
// }
|
em.handleAdminHeartbeat(data)
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("failed to subscribe to admin heartbeat: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Start discovery process
|
// Start discovery process
|
||||||
go em.startDiscoveryLoop()
|
go em.startDiscoveryLoop()
|
||||||
|
|
||||||
// Start election coordinator
|
// Start election coordinator
|
||||||
go em.electionCoordinator()
|
go em.electionCoordinator()
|
||||||
|
|
||||||
// Start heartbeat if this node is already admin at startup
|
// Start heartbeat if this node is already admin at startup
|
||||||
if em.IsCurrentAdmin() {
|
if em.IsCurrentAdmin() {
|
||||||
go func() {
|
go func() {
|
||||||
@@ -174,7 +182,7 @@ func (em *ElectionManager) Start() error {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("✅ Election manager started")
|
log.Printf("✅ Election manager started")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -182,17 +190,17 @@ func (em *ElectionManager) Start() error {
|
|||||||
// Stop shuts down the election manager
|
// Stop shuts down the election manager
|
||||||
func (em *ElectionManager) Stop() {
|
func (em *ElectionManager) Stop() {
|
||||||
log.Printf("🛑 Stopping election manager")
|
log.Printf("🛑 Stopping election manager")
|
||||||
|
|
||||||
// Stop heartbeat first
|
// Stop heartbeat first
|
||||||
if em.heartbeatManager != nil {
|
if em.heartbeatManager != nil {
|
||||||
em.heartbeatManager.StopHeartbeat()
|
em.heartbeatManager.StopHeartbeat()
|
||||||
}
|
}
|
||||||
|
|
||||||
em.cancel()
|
em.cancel()
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
if em.heartbeatTimer != nil {
|
if em.heartbeatTimer != nil {
|
||||||
em.heartbeatTimer.Stop()
|
em.heartbeatTimer.Stop()
|
||||||
}
|
}
|
||||||
@@ -255,7 +263,7 @@ func (em *ElectionManager) GetHeartbeatStatus() map[string]interface{} {
|
|||||||
// startDiscoveryLoop starts the admin discovery loop
|
// startDiscoveryLoop starts the admin discovery loop
|
||||||
func (em *ElectionManager) startDiscoveryLoop() {
|
func (em *ElectionManager) startDiscoveryLoop() {
|
||||||
log.Printf("🔍 Starting admin discovery loop")
|
log.Printf("🔍 Starting admin discovery loop")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-em.ctx.Done():
|
case <-em.ctx.Done():
|
||||||
@@ -272,19 +280,19 @@ func (em *ElectionManager) performAdminDiscovery() {
|
|||||||
currentState := em.state
|
currentState := em.state
|
||||||
lastHeartbeat := em.lastHeartbeat
|
lastHeartbeat := em.lastHeartbeat
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Only discover if we're idle or the heartbeat is stale
|
// Only discover if we're idle or the heartbeat is stale
|
||||||
if currentState != StateIdle {
|
if currentState != StateIdle {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if admin heartbeat has timed out
|
// Check if admin heartbeat has timed out
|
||||||
if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout {
|
if !lastHeartbeat.IsZero() && time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.HeartbeatTimeout {
|
||||||
log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat)
|
log.Printf("⚰️ Admin heartbeat timeout detected (last: %v)", lastHeartbeat)
|
||||||
em.TriggerElection(TriggerHeartbeatTimeout)
|
em.TriggerElection(TriggerHeartbeatTimeout)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we haven't heard from an admin recently, try to discover one
|
// If we haven't heard from an admin recently, try to discover one
|
||||||
if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 {
|
if lastHeartbeat.IsZero() || time.Since(lastHeartbeat) > em.config.Security.ElectionConfig.DiscoveryTimeout/2 {
|
||||||
em.sendDiscoveryRequest()
|
em.sendDiscoveryRequest()
|
||||||
@@ -298,7 +306,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
|||||||
NodeID: em.nodeID,
|
NodeID: em.nodeID,
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := em.publishElectionMessage(discoveryMsg); err != nil {
|
if err := em.publishElectionMessage(discoveryMsg); err != nil {
|
||||||
log.Printf("❌ Failed to send admin discovery request: %v", err)
|
log.Printf("❌ Failed to send admin discovery request: %v", err)
|
||||||
}
|
}
|
||||||
@@ -307,7 +315,7 @@ func (em *ElectionManager) sendDiscoveryRequest() {
|
|||||||
// electionCoordinator handles the main election logic
|
// electionCoordinator handles the main election logic
|
||||||
func (em *ElectionManager) electionCoordinator() {
|
func (em *ElectionManager) electionCoordinator() {
|
||||||
log.Printf("🎯 Election coordinator started")
|
log.Printf("🎯 Election coordinator started")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-em.ctx.Done():
|
case <-em.ctx.Done():
|
||||||
@@ -321,17 +329,17 @@ func (em *ElectionManager) electionCoordinator() {
|
|||||||
// handleElectionTrigger processes election triggers
|
// handleElectionTrigger processes election triggers
|
||||||
func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
||||||
log.Printf("🔥 Processing election trigger: %s", trigger)
|
log.Printf("🔥 Processing election trigger: %s", trigger)
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
currentState := em.state
|
currentState := em.state
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Ignore triggers if we're already in an election
|
// Ignore triggers if we're already in an election
|
||||||
if currentState != StateIdle {
|
if currentState != StateIdle {
|
||||||
log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState)
|
log.Printf("⏸️ Ignoring election trigger, current state: %s", currentState)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Begin election process
|
// Begin election process
|
||||||
em.beginElection(trigger)
|
em.beginElection(trigger)
|
||||||
}
|
}
|
||||||
@@ -339,7 +347,7 @@ func (em *ElectionManager) handleElectionTrigger(trigger ElectionTrigger) {
|
|||||||
// beginElection starts a new election
|
// beginElection starts a new election
|
||||||
func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
||||||
log.Printf("🗳️ Beginning election due to: %s", trigger)
|
log.Printf("🗳️ Beginning election due to: %s", trigger)
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
em.state = StateElecting
|
em.state = StateElecting
|
||||||
em.currentTerm++
|
em.currentTerm++
|
||||||
@@ -347,12 +355,12 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
|||||||
em.candidates = make(map[string]*AdminCandidate)
|
em.candidates = make(map[string]*AdminCandidate)
|
||||||
em.votes = make(map[string]string)
|
em.votes = make(map[string]string)
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Announce candidacy if this node can be admin
|
// Announce candidacy if this node can be admin
|
||||||
if em.canBeAdmin() {
|
if em.canBeAdmin() {
|
||||||
em.announceCandidacy(term)
|
em.announceCandidacy(term)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send election announcement
|
// Send election announcement
|
||||||
electionMsg := ElectionMessage{
|
electionMsg := ElectionMessage{
|
||||||
Type: "election_started",
|
Type: "election_started",
|
||||||
@@ -363,11 +371,11 @@ func (em *ElectionManager) beginElection(trigger ElectionTrigger) {
|
|||||||
"trigger": string(trigger),
|
"trigger": string(trigger),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := em.publishElectionMessage(electionMsg); err != nil {
|
if err := em.publishElectionMessage(electionMsg); err != nil {
|
||||||
log.Printf("❌ Failed to announce election start: %v", err)
|
log.Printf("❌ Failed to announce election start: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start election timeout
|
// Start election timeout
|
||||||
em.startElectionTimeout(term)
|
em.startElectionTimeout(term)
|
||||||
}
|
}
|
||||||
@@ -386,7 +394,7 @@ func (em *ElectionManager) canBeAdmin() bool {
|
|||||||
// announceCandidacy announces this node as an election candidate
|
// announceCandidacy announces this node as an election candidate
|
||||||
func (em *ElectionManager) announceCandidacy(term int) {
|
func (em *ElectionManager) announceCandidacy(term int) {
|
||||||
uptime := time.Since(em.startTime)
|
uptime := time.Since(em.startTime)
|
||||||
|
|
||||||
candidate := &AdminCandidate{
|
candidate := &AdminCandidate{
|
||||||
NodeID: em.nodeID,
|
NodeID: em.nodeID,
|
||||||
PeerID: em.host.ID(),
|
PeerID: em.host.ID(),
|
||||||
@@ -396,13 +404,13 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
|||||||
Experience: uptime, // For now, use uptime as experience
|
Experience: uptime, // For now, use uptime as experience
|
||||||
Metadata: map[string]interface{}{
|
Metadata: map[string]interface{}{
|
||||||
"specialization": em.config.Agent.Specialization,
|
"specialization": em.config.Agent.Specialization,
|
||||||
"models": em.config.Agent.Models,
|
"models": em.config.Agent.Models,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate candidate score
|
// Calculate candidate score
|
||||||
candidate.Score = em.calculateCandidateScore(candidate)
|
candidate.Score = em.calculateCandidateScore(candidate)
|
||||||
|
|
||||||
candidacyMsg := ElectionMessage{
|
candidacyMsg := ElectionMessage{
|
||||||
Type: "candidacy_announcement",
|
Type: "candidacy_announcement",
|
||||||
NodeID: em.nodeID,
|
NodeID: em.nodeID,
|
||||||
@@ -410,9 +418,9 @@ func (em *ElectionManager) announceCandidacy(term int) {
|
|||||||
Term: term,
|
Term: term,
|
||||||
Data: candidate,
|
Data: candidate,
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("📢 Announcing candidacy (score: %.2f)", candidate.Score)
|
log.Printf("📢 Announcing candidacy (score: %.2f)", candidate.Score)
|
||||||
|
|
||||||
if err := em.publishElectionMessage(candidacyMsg); err != nil {
|
if err := em.publishElectionMessage(candidacyMsg); err != nil {
|
||||||
log.Printf("❌ Failed to announce candidacy: %v", err)
|
log.Printf("❌ Failed to announce candidacy: %v", err)
|
||||||
}
|
}
|
||||||
@@ -423,9 +431,9 @@ func (em *ElectionManager) getResourceMetrics() ResourceMetrics {
|
|||||||
// TODO: Implement actual resource collection
|
// TODO: Implement actual resource collection
|
||||||
// For now, return simulated values
|
// For now, return simulated values
|
||||||
return ResourceMetrics{
|
return ResourceMetrics{
|
||||||
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
CPUUsage: rand.Float64() * 0.5, // 0-50% CPU
|
||||||
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
MemoryUsage: rand.Float64() * 0.7, // 0-70% Memory
|
||||||
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
DiskUsage: rand.Float64() * 0.6, // 0-60% Disk
|
||||||
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
NetworkQuality: 0.8 + rand.Float64()*0.2, // 80-100% Network Quality
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -435,10 +443,10 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
|||||||
// TODO: Add LeadershipScoring to config.ElectionConfig
|
// TODO: Add LeadershipScoring to config.ElectionConfig
|
||||||
// scoring := em.config.Security.ElectionConfig.LeadershipScoring
|
// scoring := em.config.Security.ElectionConfig.LeadershipScoring
|
||||||
// Default scoring weights handled inline
|
// Default scoring weights handled inline
|
||||||
|
|
||||||
// Normalize metrics to 0-1 range
|
// Normalize metrics to 0-1 range
|
||||||
uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score
|
uptimeScore := min(1.0, candidate.Uptime.Hours()/24.0) // Up to 24 hours gets full score
|
||||||
|
|
||||||
// Capability score - higher for admin/coordination capabilities
|
// Capability score - higher for admin/coordination capabilities
|
||||||
capabilityScore := 0.0
|
capabilityScore := 0.0
|
||||||
adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"}
|
adminCapabilities := []string{"admin_election", "context_curation", "key_reconstruction", "semantic_analysis", "project_manager"}
|
||||||
@@ -455,22 +463,22 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
capabilityScore = min(1.0, capabilityScore)
|
capabilityScore = min(1.0, capabilityScore)
|
||||||
|
|
||||||
// Resource score - lower usage is better
|
// Resource score - lower usage is better
|
||||||
resourceScore := (1.0 - candidate.Resources.CPUUsage) * 0.3 +
|
resourceScore := (1.0-candidate.Resources.CPUUsage)*0.3 +
|
||||||
(1.0 - candidate.Resources.MemoryUsage) * 0.3 +
|
(1.0-candidate.Resources.MemoryUsage)*0.3 +
|
||||||
(1.0 - candidate.Resources.DiskUsage) * 0.2 +
|
(1.0-candidate.Resources.DiskUsage)*0.2 +
|
||||||
candidate.Resources.NetworkQuality * 0.2
|
candidate.Resources.NetworkQuality*0.2
|
||||||
|
|
||||||
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
experienceScore := min(1.0, candidate.Experience.Hours()/168.0) // Up to 1 week gets full score
|
||||||
|
|
||||||
// Weighted final score (using default weights)
|
// Weighted final score (using default weights)
|
||||||
finalScore := uptimeScore*0.3 +
|
finalScore := uptimeScore*0.3 +
|
||||||
capabilityScore*0.2 +
|
capabilityScore*0.2 +
|
||||||
resourceScore*0.2 +
|
resourceScore*0.2 +
|
||||||
candidate.Resources.NetworkQuality*0.15 +
|
candidate.Resources.NetworkQuality*0.15 +
|
||||||
experienceScore*0.15
|
experienceScore*0.15
|
||||||
|
|
||||||
return finalScore
|
return finalScore
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -478,11 +486,11 @@ func (em *ElectionManager) calculateCandidateScore(candidate *AdminCandidate) fl
|
|||||||
func (em *ElectionManager) startElectionTimeout(term int) {
|
func (em *ElectionManager) startElectionTimeout(term int) {
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
if em.electionTimer != nil {
|
if em.electionTimer != nil {
|
||||||
em.electionTimer.Stop()
|
em.electionTimer.Stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() {
|
em.electionTimer = time.AfterFunc(em.config.Security.ElectionConfig.ElectionTimeout, func() {
|
||||||
em.completeElection(term)
|
em.completeElection(term)
|
||||||
})
|
})
|
||||||
@@ -492,15 +500,15 @@ func (em *ElectionManager) startElectionTimeout(term int) {
|
|||||||
func (em *ElectionManager) completeElection(term int) {
|
func (em *ElectionManager) completeElection(term int) {
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
// Verify this is still the current term
|
// Verify this is still the current term
|
||||||
if term != em.currentTerm {
|
if term != em.currentTerm {
|
||||||
log.Printf("⏰ Election timeout for old term %d, ignoring", term)
|
log.Printf("⏰ Election timeout for old term %d, ignoring", term)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("⏰ Election timeout reached, tallying votes")
|
log.Printf("⏰ Election timeout reached, tallying votes")
|
||||||
|
|
||||||
// Find the winning candidate
|
// Find the winning candidate
|
||||||
winner := em.findElectionWinner()
|
winner := em.findElectionWinner()
|
||||||
if winner == nil {
|
if winner == nil {
|
||||||
@@ -513,14 +521,14 @@ func (em *ElectionManager) completeElection(term int) {
|
|||||||
}()
|
}()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("🏆 Election winner: %s (score: %.2f)", winner.NodeID, winner.Score)
|
log.Printf("🏆 Election winner: %s (score: %.2f)", winner.NodeID, winner.Score)
|
||||||
|
|
||||||
// Update admin
|
// Update admin
|
||||||
oldAdmin := em.currentAdmin
|
oldAdmin := em.currentAdmin
|
||||||
em.currentAdmin = winner.NodeID
|
em.currentAdmin = winner.NodeID
|
||||||
em.state = StateComplete
|
em.state = StateComplete
|
||||||
|
|
||||||
// Announce the winner
|
// Announce the winner
|
||||||
winnerMsg := ElectionMessage{
|
winnerMsg := ElectionMessage{
|
||||||
Type: "election_winner",
|
Type: "election_winner",
|
||||||
@@ -529,16 +537,16 @@ func (em *ElectionManager) completeElection(term int) {
|
|||||||
Term: term,
|
Term: term,
|
||||||
Data: winner,
|
Data: winner,
|
||||||
}
|
}
|
||||||
|
|
||||||
em.mu.Unlock() // Unlock before publishing
|
em.mu.Unlock() // Unlock before publishing
|
||||||
|
|
||||||
if err := em.publishElectionMessage(winnerMsg); err != nil {
|
if err := em.publishElectionMessage(winnerMsg); err != nil {
|
||||||
log.Printf("❌ Failed to announce election winner: %v", err)
|
log.Printf("❌ Failed to announce election winner: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle heartbeat lifecycle based on admin change
|
// Handle heartbeat lifecycle based on admin change
|
||||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||||
|
|
||||||
// Trigger callbacks
|
// Trigger callbacks
|
||||||
if em.onAdminChanged != nil {
|
if em.onAdminChanged != nil {
|
||||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||||
@@ -546,7 +554,7 @@ func (em *ElectionManager) completeElection(term int) {
|
|||||||
if em.onElectionComplete != nil {
|
if em.onElectionComplete != nil {
|
||||||
em.onElectionComplete(winner.NodeID)
|
em.onElectionComplete(winner.NodeID)
|
||||||
}
|
}
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
em.state = StateIdle // Reset state for next election
|
em.state = StateIdle // Reset state for next election
|
||||||
}
|
}
|
||||||
@@ -556,16 +564,16 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
|||||||
if len(em.candidates) == 0 {
|
if len(em.candidates) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Count votes for each candidate
|
// Count votes for each candidate
|
||||||
voteCounts := make(map[string]int)
|
voteCounts := make(map[string]int)
|
||||||
totalVotes := 0
|
totalVotes := 0
|
||||||
|
|
||||||
// Initialize vote counts for all candidates
|
// Initialize vote counts for all candidates
|
||||||
for candidateID := range em.candidates {
|
for candidateID := range em.candidates {
|
||||||
voteCounts[candidateID] = 0
|
voteCounts[candidateID] = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tally actual votes
|
// Tally actual votes
|
||||||
for _, candidateID := range em.votes {
|
for _, candidateID := range em.votes {
|
||||||
if _, exists := em.candidates[candidateID]; exists {
|
if _, exists := em.candidates[candidateID]; exists {
|
||||||
@@ -573,12 +581,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
|||||||
totalVotes++
|
totalVotes++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no votes cast, fall back to highest scoring candidate
|
// If no votes cast, fall back to highest scoring candidate
|
||||||
if totalVotes == 0 {
|
if totalVotes == 0 {
|
||||||
var winner *AdminCandidate
|
var winner *AdminCandidate
|
||||||
highestScore := -1.0
|
highestScore := -1.0
|
||||||
|
|
||||||
for _, candidate := range em.candidates {
|
for _, candidate := range em.candidates {
|
||||||
if candidate.Score > highestScore {
|
if candidate.Score > highestScore {
|
||||||
highestScore = candidate.Score
|
highestScore = candidate.Score
|
||||||
@@ -587,12 +595,12 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
|||||||
}
|
}
|
||||||
return winner
|
return winner
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find candidate with most votes
|
// Find candidate with most votes
|
||||||
var winner *AdminCandidate
|
var winner *AdminCandidate
|
||||||
maxVotes := -1
|
maxVotes := -1
|
||||||
highestScore := -1.0
|
highestScore := -1.0
|
||||||
|
|
||||||
for candidateID, voteCount := range voteCounts {
|
for candidateID, voteCount := range voteCounts {
|
||||||
candidate := em.candidates[candidateID]
|
candidate := em.candidates[candidateID]
|
||||||
if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) {
|
if voteCount > maxVotes || (voteCount == maxVotes && candidate.Score > highestScore) {
|
||||||
@@ -601,10 +609,10 @@ func (em *ElectionManager) findElectionWinner() *AdminCandidate {
|
|||||||
winner = candidate
|
winner = candidate
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
log.Printf("🗳️ Election results: %d total votes, winner: %s with %d votes (score: %.2f)",
|
||||||
totalVotes, winner.NodeID, maxVotes, winner.Score)
|
totalVotes, winner.NodeID, maxVotes, winner.Score)
|
||||||
|
|
||||||
return winner
|
return winner
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -615,12 +623,12 @@ func (em *ElectionManager) handleElectionMessage(data []byte) {
|
|||||||
log.Printf("❌ Failed to unmarshal election message: %v", err)
|
log.Printf("❌ Failed to unmarshal election message: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ignore messages from ourselves
|
// Ignore messages from ourselves
|
||||||
if msg.NodeID == em.nodeID {
|
if msg.NodeID == em.nodeID {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
switch msg.Type {
|
switch msg.Type {
|
||||||
case "admin_discovery_request":
|
case "admin_discovery_request":
|
||||||
em.handleAdminDiscoveryRequest(msg)
|
em.handleAdminDiscoveryRequest(msg)
|
||||||
@@ -643,7 +651,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
|||||||
currentAdmin := em.currentAdmin
|
currentAdmin := em.currentAdmin
|
||||||
state := em.state
|
state := em.state
|
||||||
em.mu.RUnlock()
|
em.mu.RUnlock()
|
||||||
|
|
||||||
// Only respond if we know who the current admin is and we're idle
|
// Only respond if we know who the current admin is and we're idle
|
||||||
if currentAdmin != "" && state == StateIdle {
|
if currentAdmin != "" && state == StateIdle {
|
||||||
responseMsg := ElectionMessage{
|
responseMsg := ElectionMessage{
|
||||||
@@ -654,7 +662,7 @@ func (em *ElectionManager) handleAdminDiscoveryRequest(msg ElectionMessage) {
|
|||||||
"current_admin": currentAdmin,
|
"current_admin": currentAdmin,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := em.publishElectionMessage(responseMsg); err != nil {
|
if err := em.publishElectionMessage(responseMsg); err != nil {
|
||||||
log.Printf("❌ Failed to send admin discovery response: %v", err)
|
log.Printf("❌ Failed to send admin discovery response: %v", err)
|
||||||
}
|
}
|
||||||
@@ -679,7 +687,7 @@ func (em *ElectionManager) handleAdminDiscoveryResponse(msg ElectionMessage) {
|
|||||||
func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
// If we receive an election start with a higher term, join the election
|
// If we receive an election start with a higher term, join the election
|
||||||
if msg.Term > em.currentTerm {
|
if msg.Term > em.currentTerm {
|
||||||
log.Printf("🔄 Joining election with term %d", msg.Term)
|
log.Printf("🔄 Joining election with term %d", msg.Term)
|
||||||
@@ -687,7 +695,7 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
|||||||
em.state = StateElecting
|
em.state = StateElecting
|
||||||
em.candidates = make(map[string]*AdminCandidate)
|
em.candidates = make(map[string]*AdminCandidate)
|
||||||
em.votes = make(map[string]string)
|
em.votes = make(map[string]string)
|
||||||
|
|
||||||
// Announce candidacy if eligible
|
// Announce candidacy if eligible
|
||||||
if em.canBeAdmin() {
|
if em.canBeAdmin() {
|
||||||
go em.announceCandidacy(msg.Term)
|
go em.announceCandidacy(msg.Term)
|
||||||
@@ -699,25 +707,25 @@ func (em *ElectionManager) handleElectionStarted(msg ElectionMessage) {
|
|||||||
func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
// Only process if it's for the current term
|
// Only process if it's for the current term
|
||||||
if msg.Term != em.currentTerm {
|
if msg.Term != em.currentTerm {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert data to candidate struct
|
// Convert data to candidate struct
|
||||||
candidateData, err := json.Marshal(msg.Data)
|
candidateData, err := json.Marshal(msg.Data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("❌ Failed to marshal candidate data: %v", err)
|
log.Printf("❌ Failed to marshal candidate data: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var candidate AdminCandidate
|
var candidate AdminCandidate
|
||||||
if err := json.Unmarshal(candidateData, &candidate); err != nil {
|
if err := json.Unmarshal(candidateData, &candidate); err != nil {
|
||||||
log.Printf("❌ Failed to unmarshal candidate: %v", err)
|
log.Printf("❌ Failed to unmarshal candidate: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("📝 Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score)
|
log.Printf("📝 Received candidacy from %s (score: %.2f)", candidate.NodeID, candidate.Score)
|
||||||
em.candidates[candidate.NodeID] = &candidate
|
em.candidates[candidate.NodeID] = &candidate
|
||||||
}
|
}
|
||||||
@@ -726,31 +734,31 @@ func (em *ElectionManager) handleCandidacyAnnouncement(msg ElectionMessage) {
|
|||||||
func (em *ElectionManager) handleElectionVote(msg ElectionMessage) {
|
func (em *ElectionManager) handleElectionVote(msg ElectionMessage) {
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
// Extract vote data
|
// Extract vote data
|
||||||
voteData, ok := msg.Data.(map[string]interface{})
|
voteData, ok := msg.Data.(map[string]interface{})
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("❌ Invalid vote data format from %s", msg.NodeID)
|
log.Printf("❌ Invalid vote data format from %s", msg.NodeID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
candidateID, ok := voteData["candidate"].(string)
|
candidateID, ok := voteData["candidate"].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID)
|
log.Printf("❌ Invalid candidate ID in vote from %s", msg.NodeID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate candidate exists
|
// Validate candidate exists
|
||||||
if _, exists := em.candidates[candidateID]; !exists {
|
if _, exists := em.candidates[candidateID]; !exists {
|
||||||
log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID)
|
log.Printf("❌ Vote for unknown candidate %s from %s", candidateID, msg.NodeID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prevent duplicate voting
|
// Prevent duplicate voting
|
||||||
if existingVote, exists := em.votes[msg.NodeID]; exists {
|
if existingVote, exists := em.votes[msg.NodeID]; exists {
|
||||||
log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID)
|
log.Printf("⚠️ Node %s already voted for %s, updating to %s", msg.NodeID, existingVote, candidateID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record the vote
|
// Record the vote
|
||||||
em.votes[msg.NodeID] = candidateID
|
em.votes[msg.NodeID] = candidateID
|
||||||
log.Printf("🗳️ Recorded vote from %s for candidate %s", msg.NodeID, candidateID)
|
log.Printf("🗳️ Recorded vote from %s for candidate %s", msg.NodeID, candidateID)
|
||||||
@@ -763,24 +771,24 @@ func (em *ElectionManager) handleElectionWinner(msg ElectionMessage) {
|
|||||||
log.Printf("❌ Failed to marshal winner data: %v", err)
|
log.Printf("❌ Failed to marshal winner data: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var winner AdminCandidate
|
var winner AdminCandidate
|
||||||
if err := json.Unmarshal(candidateData, &winner); err != nil {
|
if err := json.Unmarshal(candidateData, &winner); err != nil {
|
||||||
log.Printf("❌ Failed to unmarshal winner: %v", err)
|
log.Printf("❌ Failed to unmarshal winner: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
oldAdmin := em.currentAdmin
|
oldAdmin := em.currentAdmin
|
||||||
em.currentAdmin = winner.NodeID
|
em.currentAdmin = winner.NodeID
|
||||||
em.state = StateIdle
|
em.state = StateIdle
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
log.Printf("👑 New admin elected: %s", winner.NodeID)
|
log.Printf("👑 New admin elected: %s", winner.NodeID)
|
||||||
|
|
||||||
// Handle heartbeat lifecycle based on admin change
|
// Handle heartbeat lifecycle based on admin change
|
||||||
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
em.handleHeartbeatTransition(oldAdmin, winner.NodeID)
|
||||||
|
|
||||||
// Trigger callback
|
// Trigger callback
|
||||||
if em.onAdminChanged != nil {
|
if em.onAdminChanged != nil {
|
||||||
em.onAdminChanged(oldAdmin, winner.NodeID)
|
em.onAdminChanged(oldAdmin, winner.NodeID)
|
||||||
@@ -796,7 +804,7 @@ func (em *ElectionManager) handleHeartbeatTransition(oldAdmin, newAdmin string)
|
|||||||
log.Printf("⚠️ Error stopping heartbeat: %v", err)
|
log.Printf("⚠️ Error stopping heartbeat: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we gained admin role, start heartbeat
|
// If we gained admin role, start heartbeat
|
||||||
if newAdmin == em.nodeID && oldAdmin != em.nodeID {
|
if newAdmin == em.nodeID && oldAdmin != em.nodeID {
|
||||||
log.Printf("🔄 Gained admin role, starting heartbeat")
|
log.Printf("🔄 Gained admin role, starting heartbeat")
|
||||||
@@ -816,15 +824,15 @@ func (em *ElectionManager) handleAdminHeartbeat(data []byte) {
|
|||||||
NodeID string `json:"node_id"`
|
NodeID string `json:"node_id"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(data, &heartbeat); err != nil {
|
if err := json.Unmarshal(data, &heartbeat); err != nil {
|
||||||
log.Printf("❌ Failed to unmarshal heartbeat: %v", err)
|
log.Printf("❌ Failed to unmarshal heartbeat: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
defer em.mu.Unlock()
|
defer em.mu.Unlock()
|
||||||
|
|
||||||
// Update admin and heartbeat timestamp
|
// Update admin and heartbeat timestamp
|
||||||
if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID {
|
if em.currentAdmin == "" || em.currentAdmin == heartbeat.NodeID {
|
||||||
em.currentAdmin = heartbeat.NodeID
|
em.currentAdmin = heartbeat.NodeID
|
||||||
@@ -838,11 +846,8 @@ func (em *ElectionManager) publishElectionMessage(msg ElectionMessage) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal election message: %w", err)
|
return fmt.Errorf("failed to marshal election message: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Fix pubsub interface
|
return em.pubsub.PublishRaw(electionTopic, data)
|
||||||
// return em.pubsub.Publish("CHORUS/election/v1", data)
|
|
||||||
_ = data // Avoid unused variable
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
// SendAdminHeartbeat sends admin heartbeat (only if this node is admin)
|
||||||
@@ -850,7 +855,7 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
|||||||
if !em.IsCurrentAdmin() {
|
if !em.IsCurrentAdmin() {
|
||||||
return fmt.Errorf("not current admin")
|
return fmt.Errorf("not current admin")
|
||||||
}
|
}
|
||||||
|
|
||||||
heartbeat := struct {
|
heartbeat := struct {
|
||||||
NodeID string `json:"node_id"`
|
NodeID string `json:"node_id"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
@@ -858,16 +863,13 @@ func (em *ElectionManager) SendAdminHeartbeat() error {
|
|||||||
NodeID: em.nodeID,
|
NodeID: em.nodeID,
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := json.Marshal(heartbeat)
|
data, err := json.Marshal(heartbeat)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
return fmt.Errorf("failed to marshal heartbeat: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Fix pubsub interface
|
return em.pubsub.PublishRaw(adminHeartbeatTopic, data)
|
||||||
// return em.pubsub.Publish("CHORUS/admin/heartbeat/v1", data)
|
|
||||||
_ = data // Avoid unused variable
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// min returns the minimum of two float64 values
|
// min returns the minimum of two float64 values
|
||||||
@@ -894,26 +896,26 @@ func NewHeartbeatManager(electionMgr *ElectionManager) *HeartbeatManager {
|
|||||||
func (hm *HeartbeatManager) StartHeartbeat() error {
|
func (hm *HeartbeatManager) StartHeartbeat() error {
|
||||||
hm.mu.Lock()
|
hm.mu.Lock()
|
||||||
defer hm.mu.Unlock()
|
defer hm.mu.Unlock()
|
||||||
|
|
||||||
if hm.isRunning {
|
if hm.isRunning {
|
||||||
hm.logger("Heartbeat already running")
|
hm.logger("Heartbeat already running")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if !hm.electionMgr.IsCurrentAdmin() {
|
if !hm.electionMgr.IsCurrentAdmin() {
|
||||||
return fmt.Errorf("not admin, cannot start heartbeat")
|
return fmt.Errorf("not admin, cannot start heartbeat")
|
||||||
}
|
}
|
||||||
|
|
||||||
hm.logger("Starting admin heartbeat transmission")
|
hm.logger("Starting admin heartbeat transmission")
|
||||||
|
|
||||||
hm.stopCh = make(chan struct{})
|
hm.stopCh = make(chan struct{})
|
||||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||||
hm.ticker = time.NewTicker(interval)
|
hm.ticker = time.NewTicker(interval)
|
||||||
hm.isRunning = true
|
hm.isRunning = true
|
||||||
|
|
||||||
// Start heartbeat goroutine
|
// Start heartbeat goroutine
|
||||||
go hm.heartbeatLoop()
|
go hm.heartbeatLoop()
|
||||||
|
|
||||||
hm.logger("Admin heartbeat started (interval: %v)", interval)
|
hm.logger("Admin heartbeat started (interval: %v)", interval)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -922,22 +924,22 @@ func (hm *HeartbeatManager) StartHeartbeat() error {
|
|||||||
func (hm *HeartbeatManager) StopHeartbeat() error {
|
func (hm *HeartbeatManager) StopHeartbeat() error {
|
||||||
hm.mu.Lock()
|
hm.mu.Lock()
|
||||||
defer hm.mu.Unlock()
|
defer hm.mu.Unlock()
|
||||||
|
|
||||||
if !hm.isRunning {
|
if !hm.isRunning {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
hm.logger("Stopping admin heartbeat transmission")
|
hm.logger("Stopping admin heartbeat transmission")
|
||||||
|
|
||||||
// Signal stop
|
// Signal stop
|
||||||
close(hm.stopCh)
|
close(hm.stopCh)
|
||||||
|
|
||||||
// Stop ticker
|
// Stop ticker
|
||||||
if hm.ticker != nil {
|
if hm.ticker != nil {
|
||||||
hm.ticker.Stop()
|
hm.ticker.Stop()
|
||||||
hm.ticker = nil
|
hm.ticker = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
hm.isRunning = false
|
hm.isRunning = false
|
||||||
hm.logger("Admin heartbeat stopped")
|
hm.logger("Admin heartbeat stopped")
|
||||||
return nil
|
return nil
|
||||||
@@ -958,7 +960,7 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
|||||||
hm.mu.Unlock()
|
hm.mu.Unlock()
|
||||||
hm.logger("Heartbeat loop terminated")
|
hm.logger("Heartbeat loop terminated")
|
||||||
}()
|
}()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-hm.ticker.C:
|
case <-hm.ticker.C:
|
||||||
@@ -971,11 +973,11 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
|||||||
hm.logger("No longer admin, stopping heartbeat")
|
hm.logger("No longer admin, stopping heartbeat")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-hm.stopCh:
|
case <-hm.stopCh:
|
||||||
hm.logger("Heartbeat stop signal received")
|
hm.logger("Heartbeat stop signal received")
|
||||||
return
|
return
|
||||||
|
|
||||||
case <-hm.electionMgr.ctx.Done():
|
case <-hm.electionMgr.ctx.Done():
|
||||||
hm.logger("Election manager context cancelled")
|
hm.logger("Election manager context cancelled")
|
||||||
return
|
return
|
||||||
@@ -987,19 +989,19 @@ func (hm *HeartbeatManager) heartbeatLoop() {
|
|||||||
func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
func (hm *HeartbeatManager) GetHeartbeatStatus() map[string]interface{} {
|
||||||
hm.mu.Lock()
|
hm.mu.Lock()
|
||||||
defer hm.mu.Unlock()
|
defer hm.mu.Unlock()
|
||||||
|
|
||||||
status := map[string]interface{}{
|
status := map[string]interface{}{
|
||||||
"running": hm.isRunning,
|
"running": hm.isRunning,
|
||||||
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
"is_admin": hm.electionMgr.IsCurrentAdmin(),
|
||||||
"last_sent": time.Now(), // TODO: Track actual last sent time
|
"last_sent": time.Now(), // TODO: Track actual last sent time
|
||||||
}
|
}
|
||||||
|
|
||||||
if hm.isRunning && hm.ticker != nil {
|
if hm.isRunning && hm.ticker != nil {
|
||||||
// Calculate next heartbeat time (approximate)
|
// Calculate next heartbeat time (approximate)
|
||||||
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
interval := hm.electionMgr.config.Security.ElectionConfig.HeartbeatTimeout / 2
|
||||||
status["interval"] = interval.String()
|
status["interval"] = interval.String()
|
||||||
status["next_heartbeat"] = time.Now().Add(interval)
|
status["next_heartbeat"] = time.Now().Add(interval)
|
||||||
}
|
}
|
||||||
|
|
||||||
return status
|
return status
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,451 +2,185 @@ package election
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"chorus/pkg/config"
|
"chorus/pkg/config"
|
||||||
|
pubsubpkg "chorus/pubsub"
|
||||||
|
libp2p "github.com/libp2p/go-libp2p"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestElectionManager_NewElectionManager(t *testing.T) {
|
// newTestElectionManager wires a real libp2p host and PubSub instance so the
|
||||||
|
// election manager exercises the same code paths used in production.
|
||||||
|
func newTestElectionManager(t *testing.T) *ElectionManager {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
host, err := libp2p.New(libp2p.ListenAddrStrings("/ip4/127.0.0.1/tcp/0"))
|
||||||
|
if err != nil {
|
||||||
|
cancel()
|
||||||
|
t.Fatalf("failed to create libp2p host: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ps, err := pubsubpkg.NewPubSub(ctx, host, "", "")
|
||||||
|
if err != nil {
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
t.Fatalf("failed to create pubsub: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
cfg := &config.Config{
|
cfg := &config.Config{
|
||||||
Agent: config.AgentConfig{
|
Agent: config.AgentConfig{
|
||||||
ID: "test-node",
|
ID: host.ID().String(),
|
||||||
|
Role: "context_admin",
|
||||||
|
Capabilities: []string{"admin_election", "context_curation"},
|
||||||
|
Models: []string{"meta/llama-3.1-8b-instruct"},
|
||||||
|
Specialization: "coordination",
|
||||||
},
|
},
|
||||||
|
Security: config.SecurityConfig{},
|
||||||
}
|
}
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
em := NewElectionManager(ctx, cfg, host, ps, host.ID().String())
|
||||||
if em == nil {
|
|
||||||
t.Fatal("Expected NewElectionManager to return non-nil manager")
|
|
||||||
}
|
|
||||||
|
|
||||||
if em.nodeID != "test-node" {
|
t.Cleanup(func() {
|
||||||
t.Errorf("Expected nodeID to be 'test-node', got %s", em.nodeID)
|
em.Stop()
|
||||||
}
|
ps.Close()
|
||||||
|
host.Close()
|
||||||
|
cancel()
|
||||||
|
})
|
||||||
|
|
||||||
|
return em
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewElectionManagerInitialState(t *testing.T) {
|
||||||
|
em := newTestElectionManager(t)
|
||||||
|
|
||||||
if em.state != StateIdle {
|
if em.state != StateIdle {
|
||||||
t.Errorf("Expected initial state to be StateIdle, got %v", em.state)
|
t.Fatalf("expected initial state %q, got %q", StateIdle, em.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
if em.currentTerm != 0 {
|
||||||
|
t.Fatalf("expected initial term 0, got %d", em.currentTerm)
|
||||||
|
}
|
||||||
|
|
||||||
|
if em.nodeID == "" {
|
||||||
|
t.Fatal("expected nodeID to be populated")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestElectionManager_StartElection(t *testing.T) {
|
func TestElectionManagerCanBeAdmin(t *testing.T) {
|
||||||
cfg := &config.Config{
|
em := newTestElectionManager(t)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
if !em.canBeAdmin() {
|
||||||
},
|
t.Fatal("expected node to qualify for admin election")
|
||||||
}
|
}
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
em.config.Agent.Capabilities = []string{"runtime_support"}
|
||||||
|
if em.canBeAdmin() {
|
||||||
// Start election
|
t.Fatal("expected node without admin capabilities to be ineligible")
|
||||||
err := em.StartElection()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to start election: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify state changed
|
|
||||||
if em.state != StateCandidate {
|
|
||||||
t.Errorf("Expected state to be StateCandidate after starting election, got %v", em.state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify we added ourselves as a candidate
|
|
||||||
em.mu.RLock()
|
|
||||||
candidate, exists := em.candidates[em.nodeID]
|
|
||||||
em.mu.RUnlock()
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
t.Error("Expected to find ourselves as a candidate after starting election")
|
|
||||||
}
|
|
||||||
|
|
||||||
if candidate.NodeID != em.nodeID {
|
|
||||||
t.Errorf("Expected candidate NodeID to be %s, got %s", em.nodeID, candidate.NodeID)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestElectionManager_Vote(t *testing.T) {
|
func TestFindElectionWinnerPrefersVotesThenScore(t *testing.T) {
|
||||||
cfg := &config.Config{
|
em := newTestElectionManager(t)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Add a candidate first
|
|
||||||
candidate := &AdminCandidate{
|
|
||||||
NodeID: "candidate-1",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.8,
|
|
||||||
Capabilities: []string{"admin"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
}
|
|
||||||
|
|
||||||
em.mu.Lock()
|
|
||||||
em.candidates["candidate-1"] = candidate
|
|
||||||
em.mu.Unlock()
|
|
||||||
|
|
||||||
// Vote for the candidate
|
|
||||||
err := em.Vote("candidate-1")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to vote: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify vote was recorded
|
|
||||||
em.mu.RLock()
|
|
||||||
vote, exists := em.votes[em.nodeID]
|
|
||||||
em.mu.RUnlock()
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
t.Error("Expected to find our vote after voting")
|
|
||||||
}
|
|
||||||
|
|
||||||
if vote != "candidate-1" {
|
|
||||||
t.Errorf("Expected vote to be for 'candidate-1', got %s", vote)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestElectionManager_VoteInvalidCandidate(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Try to vote for non-existent candidate
|
|
||||||
err := em.Vote("non-existent")
|
|
||||||
if err == nil {
|
|
||||||
t.Error("Expected error when voting for non-existent candidate")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestElectionManager_AddCandidate(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
candidate := &AdminCandidate{
|
|
||||||
NodeID: "new-candidate",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.7,
|
|
||||||
Capabilities: []string{"admin", "leader"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
}
|
|
||||||
|
|
||||||
err := em.AddCandidate(candidate)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to add candidate: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify candidate was added
|
|
||||||
em.mu.RLock()
|
|
||||||
stored, exists := em.candidates["new-candidate"]
|
|
||||||
em.mu.RUnlock()
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
t.Error("Expected to find added candidate")
|
|
||||||
}
|
|
||||||
|
|
||||||
if stored.NodeID != "new-candidate" {
|
|
||||||
t.Errorf("Expected stored candidate NodeID to be 'new-candidate', got %s", stored.NodeID)
|
|
||||||
}
|
|
||||||
|
|
||||||
if stored.Score != 0.7 {
|
|
||||||
t.Errorf("Expected stored candidate score to be 0.7, got %f", stored.Score)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestElectionManager_FindElectionWinner(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Add candidates with different scores
|
|
||||||
candidates := []*AdminCandidate{
|
|
||||||
{
|
|
||||||
NodeID: "candidate-1",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.6,
|
|
||||||
Capabilities: []string{"admin"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
NodeID: "candidate-2",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.8,
|
|
||||||
Capabilities: []string{"admin", "leader"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
NodeID: "candidate-3",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.7,
|
|
||||||
Capabilities: []string{"admin"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
for _, candidate := range candidates {
|
em.candidates = map[string]*AdminCandidate{
|
||||||
em.candidates[candidate.NodeID] = candidate
|
"candidate-1": {
|
||||||
|
NodeID: "candidate-1",
|
||||||
|
PeerID: em.host.ID(),
|
||||||
|
Score: 0.65,
|
||||||
|
},
|
||||||
|
"candidate-2": {
|
||||||
|
NodeID: "candidate-2",
|
||||||
|
PeerID: em.host.ID(),
|
||||||
|
Score: 0.80,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
em.votes = map[string]string{
|
||||||
|
"voter-a": "candidate-1",
|
||||||
|
"voter-b": "candidate-2",
|
||||||
|
"voter-c": "candidate-2",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add some votes
|
|
||||||
em.votes["voter-1"] = "candidate-2"
|
|
||||||
em.votes["voter-2"] = "candidate-2"
|
|
||||||
em.votes["voter-3"] = "candidate-1"
|
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Find winner
|
|
||||||
winner := em.findElectionWinner()
|
winner := em.findElectionWinner()
|
||||||
|
|
||||||
if winner == nil {
|
if winner == nil {
|
||||||
t.Fatal("Expected findElectionWinner to return a winner")
|
t.Fatal("expected a winner to be selected")
|
||||||
}
|
}
|
||||||
|
|
||||||
// candidate-2 should win with most votes (2 votes)
|
|
||||||
if winner.NodeID != "candidate-2" {
|
if winner.NodeID != "candidate-2" {
|
||||||
t.Errorf("Expected winner to be 'candidate-2', got %s", winner.NodeID)
|
t.Fatalf("expected candidate-2 to win, got %s", winner.NodeID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestElectionManager_FindElectionWinnerNoVotes(t *testing.T) {
|
func TestHandleElectionMessageAddsCandidate(t *testing.T) {
|
||||||
cfg := &config.Config{
|
em := newTestElectionManager(t)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Add candidates but no votes - should fall back to highest score
|
|
||||||
candidates := []*AdminCandidate{
|
|
||||||
{
|
|
||||||
NodeID: "candidate-1",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.6,
|
|
||||||
Capabilities: []string{"admin"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
NodeID: "candidate-2",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.9, // Highest score
|
|
||||||
Capabilities: []string{"admin", "leader"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
for _, candidate := range candidates {
|
em.currentTerm = 3
|
||||||
em.candidates[candidate.NodeID] = candidate
|
em.state = StateElecting
|
||||||
}
|
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Find winner without any votes
|
|
||||||
winner := em.findElectionWinner()
|
|
||||||
|
|
||||||
if winner == nil {
|
|
||||||
t.Fatal("Expected findElectionWinner to return a winner")
|
|
||||||
}
|
|
||||||
|
|
||||||
// candidate-2 should win with highest score
|
|
||||||
if winner.NodeID != "candidate-2" {
|
|
||||||
t.Errorf("Expected winner to be 'candidate-2' (highest score), got %s", winner.NodeID)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestElectionManager_HandleElectionVote(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Add a candidate first
|
|
||||||
candidate := &AdminCandidate{
|
candidate := &AdminCandidate{
|
||||||
NodeID: "candidate-1",
|
NodeID: "peer-2",
|
||||||
Term: 1,
|
PeerID: em.host.ID(),
|
||||||
Score: 0.8,
|
Capabilities: []string{"admin_election"},
|
||||||
Capabilities: []string{"admin"},
|
Uptime: time.Second,
|
||||||
LastSeen: time.Now(),
|
Score: 0.75,
|
||||||
|
}
|
||||||
|
|
||||||
|
payload, err := json.Marshal(candidate)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal candidate: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var data map[string]interface{}
|
||||||
|
if err := json.Unmarshal(payload, &data); err != nil {
|
||||||
|
t.Fatalf("failed to unmarshal candidate payload: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
em.mu.Lock()
|
|
||||||
em.candidates["candidate-1"] = candidate
|
|
||||||
em.mu.Unlock()
|
|
||||||
|
|
||||||
// Create vote message
|
|
||||||
msg := ElectionMessage{
|
msg := ElectionMessage{
|
||||||
Type: MessageTypeVote,
|
Type: "candidacy_announcement",
|
||||||
NodeID: "voter-1",
|
NodeID: "peer-2",
|
||||||
Data: map[string]interface{}{
|
Timestamp: time.Now(),
|
||||||
"candidate": "candidate-1",
|
Term: 3,
|
||||||
},
|
Data: data,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle the vote
|
serialized, err := json.Marshal(msg)
|
||||||
em.handleElectionVote(msg)
|
if err != nil {
|
||||||
|
t.Fatalf("failed to marshal election message: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
em.handleElectionMessage(serialized)
|
||||||
|
|
||||||
// Verify vote was recorded
|
|
||||||
em.mu.RLock()
|
em.mu.RLock()
|
||||||
vote, exists := em.votes["voter-1"]
|
_, exists := em.candidates["peer-2"]
|
||||||
em.mu.RUnlock()
|
em.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
t.Error("Expected vote to be recorded after handling vote message")
|
t.Fatal("expected candidacy announcement to register candidate")
|
||||||
}
|
|
||||||
|
|
||||||
if vote != "candidate-1" {
|
|
||||||
t.Errorf("Expected recorded vote to be for 'candidate-1', got %s", vote)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestElectionManager_HandleElectionVoteInvalidData(t *testing.T) {
|
func TestSendAdminHeartbeatRequiresLeadership(t *testing.T) {
|
||||||
cfg := &config.Config{
|
em := newTestElectionManager(t)
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
if err := em.SendAdminHeartbeat(); err == nil {
|
||||||
},
|
t.Fatal("expected error when non-admin sends heartbeat")
|
||||||
}
|
}
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
if err := em.Start(); err != nil {
|
||||||
|
t.Fatalf("failed to start election manager: %v", err)
|
||||||
// Create vote message with invalid data
|
|
||||||
msg := ElectionMessage{
|
|
||||||
Type: MessageTypeVote,
|
|
||||||
NodeID: "voter-1",
|
|
||||||
Data: "invalid-data", // Should be map[string]interface{}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle the vote - should not crash
|
|
||||||
em.handleElectionVote(msg)
|
|
||||||
|
|
||||||
// Verify no vote was recorded
|
|
||||||
em.mu.RLock()
|
|
||||||
_, exists := em.votes["voter-1"]
|
|
||||||
em.mu.RUnlock()
|
|
||||||
|
|
||||||
if exists {
|
|
||||||
t.Error("Expected no vote to be recorded with invalid data")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestElectionManager_CompleteElection(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Set up election state
|
|
||||||
em.mu.Lock()
|
em.mu.Lock()
|
||||||
em.state = StateCandidate
|
em.currentAdmin = em.nodeID
|
||||||
em.currentTerm = 1
|
|
||||||
em.mu.Unlock()
|
em.mu.Unlock()
|
||||||
|
|
||||||
// Add a candidate
|
if err := em.SendAdminHeartbeat(); err != nil {
|
||||||
candidate := &AdminCandidate{
|
t.Fatalf("expected heartbeat to succeed for current admin, got error: %v", err)
|
||||||
NodeID: "winner",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.9,
|
|
||||||
Capabilities: []string{"admin", "leader"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
}
|
|
||||||
|
|
||||||
em.mu.Lock()
|
|
||||||
em.candidates["winner"] = candidate
|
|
||||||
em.mu.Unlock()
|
|
||||||
|
|
||||||
// Complete election
|
|
||||||
em.CompleteElection()
|
|
||||||
|
|
||||||
// Verify state reset
|
|
||||||
em.mu.RLock()
|
|
||||||
state := em.state
|
|
||||||
em.mu.RUnlock()
|
|
||||||
|
|
||||||
if state != StateIdle {
|
|
||||||
t.Errorf("Expected state to be StateIdle after completing election, got %v", state)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestElectionManager_Concurrency(t *testing.T) {
|
|
||||||
cfg := &config.Config{
|
|
||||||
Agent: config.AgentConfig{
|
|
||||||
ID: "test-node",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
em := NewElectionManager(cfg)
|
|
||||||
|
|
||||||
// Test concurrent access to vote and candidate operations
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// Add a candidate
|
|
||||||
candidate := &AdminCandidate{
|
|
||||||
NodeID: "candidate-1",
|
|
||||||
Term: 1,
|
|
||||||
Score: 0.8,
|
|
||||||
Capabilities: []string{"admin"},
|
|
||||||
LastSeen: time.Now(),
|
|
||||||
}
|
|
||||||
|
|
||||||
err := em.AddCandidate(candidate)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to add candidate: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run concurrent operations
|
|
||||||
done := make(chan bool, 2)
|
|
||||||
|
|
||||||
// Concurrent voting
|
|
||||||
go func() {
|
|
||||||
defer func() { done <- true }()
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
em.Vote("candidate-1") // Ignore errors in concurrent test
|
|
||||||
time.Sleep(10 * time.Millisecond)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Concurrent state checking
|
|
||||||
go func() {
|
|
||||||
defer func() { done <- true }()
|
|
||||||
for i := 0; i < 10; i++ {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
em.findElectionWinner() // Just check for races
|
|
||||||
time.Sleep(10 * time.Millisecond)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Wait for completion
|
|
||||||
for i := 0; i < 2; i++ {
|
|
||||||
select {
|
|
||||||
case <-done:
|
|
||||||
case <-ctx.Done():
|
|
||||||
t.Fatal("Concurrent test timed out")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -2,27 +2,26 @@ package metrics
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
||||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system
|
// CHORUSMetrics provides comprehensive Prometheus metrics for the CHORUS system
|
||||||
type CHORUSMetrics struct {
|
type CHORUSMetrics struct {
|
||||||
registry *prometheus.Registry
|
registry *prometheus.Registry
|
||||||
httpServer *http.Server
|
httpServer *http.Server
|
||||||
|
|
||||||
// System metrics
|
// System metrics
|
||||||
systemInfo *prometheus.GaugeVec
|
systemInfo *prometheus.GaugeVec
|
||||||
uptime prometheus.Gauge
|
uptime prometheus.Gauge
|
||||||
buildInfo *prometheus.GaugeVec
|
buildInfo *prometheus.GaugeVec
|
||||||
|
|
||||||
// P2P metrics
|
// P2P metrics
|
||||||
p2pConnectedPeers prometheus.Gauge
|
p2pConnectedPeers prometheus.Gauge
|
||||||
p2pMessagesSent *prometheus.CounterVec
|
p2pMessagesSent *prometheus.CounterVec
|
||||||
@@ -30,95 +29,98 @@ type CHORUSMetrics struct {
|
|||||||
p2pMessageLatency *prometheus.HistogramVec
|
p2pMessageLatency *prometheus.HistogramVec
|
||||||
p2pConnectionDuration *prometheus.HistogramVec
|
p2pConnectionDuration *prometheus.HistogramVec
|
||||||
p2pPeerScore *prometheus.GaugeVec
|
p2pPeerScore *prometheus.GaugeVec
|
||||||
|
|
||||||
// DHT metrics
|
// DHT metrics
|
||||||
dhtPutOperations *prometheus.CounterVec
|
dhtPutOperations *prometheus.CounterVec
|
||||||
dhtGetOperations *prometheus.CounterVec
|
dhtGetOperations *prometheus.CounterVec
|
||||||
dhtOperationLatency *prometheus.HistogramVec
|
dhtOperationLatency *prometheus.HistogramVec
|
||||||
dhtProviderRecords prometheus.Gauge
|
dhtProviderRecords prometheus.Gauge
|
||||||
dhtReplicationFactor *prometheus.GaugeVec
|
dhtReplicationFactor *prometheus.GaugeVec
|
||||||
dhtContentKeys prometheus.Gauge
|
dhtContentKeys prometheus.Gauge
|
||||||
dhtCacheHits *prometheus.CounterVec
|
dhtCacheHits *prometheus.CounterVec
|
||||||
dhtCacheMisses *prometheus.CounterVec
|
dhtCacheMisses *prometheus.CounterVec
|
||||||
|
|
||||||
// PubSub metrics
|
// PubSub metrics
|
||||||
pubsubTopics prometheus.Gauge
|
pubsubTopics prometheus.Gauge
|
||||||
pubsubSubscribers *prometheus.GaugeVec
|
pubsubSubscribers *prometheus.GaugeVec
|
||||||
pubsubMessages *prometheus.CounterVec
|
pubsubMessages *prometheus.CounterVec
|
||||||
pubsubMessageLatency *prometheus.HistogramVec
|
pubsubMessageLatency *prometheus.HistogramVec
|
||||||
pubsubMessageSize *prometheus.HistogramVec
|
pubsubMessageSize *prometheus.HistogramVec
|
||||||
|
|
||||||
// Election metrics
|
// Election metrics
|
||||||
electionTerm prometheus.Gauge
|
electionTerm prometheus.Gauge
|
||||||
electionState *prometheus.GaugeVec
|
electionState *prometheus.GaugeVec
|
||||||
heartbeatsSent prometheus.Counter
|
heartbeatsSent prometheus.Counter
|
||||||
heartbeatsReceived prometheus.Counter
|
heartbeatsReceived prometheus.Counter
|
||||||
leadershipChanges prometheus.Counter
|
leadershipChanges prometheus.Counter
|
||||||
leaderUptime prometheus.Gauge
|
leaderUptime prometheus.Gauge
|
||||||
electionLatency prometheus.Histogram
|
electionLatency prometheus.Histogram
|
||||||
|
|
||||||
// Health metrics
|
// Health metrics
|
||||||
healthChecksPassed *prometheus.CounterVec
|
healthChecksPassed *prometheus.CounterVec
|
||||||
healthChecksFailed *prometheus.CounterVec
|
healthChecksFailed *prometheus.CounterVec
|
||||||
healthCheckDuration *prometheus.HistogramVec
|
healthCheckDuration *prometheus.HistogramVec
|
||||||
systemHealthScore prometheus.Gauge
|
systemHealthScore prometheus.Gauge
|
||||||
componentHealthScore *prometheus.GaugeVec
|
componentHealthScore *prometheus.GaugeVec
|
||||||
|
|
||||||
// Task metrics
|
// Task metrics
|
||||||
tasksActive prometheus.Gauge
|
tasksActive prometheus.Gauge
|
||||||
tasksQueued prometheus.Gauge
|
tasksQueued prometheus.Gauge
|
||||||
tasksCompleted *prometheus.CounterVec
|
tasksCompleted *prometheus.CounterVec
|
||||||
taskDuration *prometheus.HistogramVec
|
taskDuration *prometheus.HistogramVec
|
||||||
taskQueueWaitTime prometheus.Histogram
|
taskQueueWaitTime prometheus.Histogram
|
||||||
|
|
||||||
// SLURP metrics (context generation)
|
// SLURP metrics (context generation)
|
||||||
slurpGenerated *prometheus.CounterVec
|
slurpGenerated *prometheus.CounterVec
|
||||||
slurpGenerationTime prometheus.Histogram
|
slurpGenerationTime prometheus.Histogram
|
||||||
slurpQueueLength prometheus.Gauge
|
slurpQueueLength prometheus.Gauge
|
||||||
slurpActiveJobs prometheus.Gauge
|
slurpActiveJobs prometheus.Gauge
|
||||||
slurpLeadershipEvents prometheus.Counter
|
slurpLeadershipEvents prometheus.Counter
|
||||||
|
|
||||||
|
// SHHH sentinel metrics
|
||||||
|
shhhFindings *prometheus.CounterVec
|
||||||
|
|
||||||
// UCXI metrics (protocol resolution)
|
// UCXI metrics (protocol resolution)
|
||||||
ucxiRequests *prometheus.CounterVec
|
ucxiRequests *prometheus.CounterVec
|
||||||
ucxiResolutionLatency prometheus.Histogram
|
ucxiResolutionLatency prometheus.Histogram
|
||||||
ucxiCacheHits prometheus.Counter
|
ucxiCacheHits prometheus.Counter
|
||||||
ucxiCacheMisses prometheus.Counter
|
ucxiCacheMisses prometheus.Counter
|
||||||
ucxiContentSize prometheus.Histogram
|
ucxiContentSize prometheus.Histogram
|
||||||
|
|
||||||
// Resource metrics
|
// Resource metrics
|
||||||
cpuUsage prometheus.Gauge
|
cpuUsage prometheus.Gauge
|
||||||
memoryUsage prometheus.Gauge
|
memoryUsage prometheus.Gauge
|
||||||
diskUsage *prometheus.GaugeVec
|
diskUsage *prometheus.GaugeVec
|
||||||
networkBytesIn prometheus.Counter
|
networkBytesIn prometheus.Counter
|
||||||
networkBytesOut prometheus.Counter
|
networkBytesOut prometheus.Counter
|
||||||
goroutines prometheus.Gauge
|
goroutines prometheus.Gauge
|
||||||
|
|
||||||
// Error metrics
|
// Error metrics
|
||||||
errors *prometheus.CounterVec
|
errors *prometheus.CounterVec
|
||||||
panics prometheus.Counter
|
panics prometheus.Counter
|
||||||
|
|
||||||
startTime time.Time
|
startTime time.Time
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// MetricsConfig configures the metrics system
|
// MetricsConfig configures the metrics system
|
||||||
type MetricsConfig struct {
|
type MetricsConfig struct {
|
||||||
// HTTP server config
|
// HTTP server config
|
||||||
ListenAddr string
|
ListenAddr string
|
||||||
MetricsPath string
|
MetricsPath string
|
||||||
|
|
||||||
// Histogram buckets
|
// Histogram buckets
|
||||||
LatencyBuckets []float64
|
LatencyBuckets []float64
|
||||||
SizeBuckets []float64
|
SizeBuckets []float64
|
||||||
|
|
||||||
// Labels
|
// Labels
|
||||||
NodeID string
|
NodeID string
|
||||||
Version string
|
Version string
|
||||||
Environment string
|
Environment string
|
||||||
Cluster string
|
Cluster string
|
||||||
|
|
||||||
// Collection intervals
|
// Collection intervals
|
||||||
SystemMetricsInterval time.Duration
|
SystemMetricsInterval time.Duration
|
||||||
ResourceMetricsInterval time.Duration
|
ResourceMetricsInterval time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,20 +145,20 @@ func NewCHORUSMetrics(config *MetricsConfig) *CHORUSMetrics {
|
|||||||
if config == nil {
|
if config == nil {
|
||||||
config = DefaultMetricsConfig()
|
config = DefaultMetricsConfig()
|
||||||
}
|
}
|
||||||
|
|
||||||
registry := prometheus.NewRegistry()
|
registry := prometheus.NewRegistry()
|
||||||
|
|
||||||
metrics := &CHORUSMetrics{
|
metrics := &CHORUSMetrics{
|
||||||
registry: registry,
|
registry: registry,
|
||||||
startTime: time.Now(),
|
startTime: time.Now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize all metrics
|
// Initialize all metrics
|
||||||
metrics.initializeMetrics(config)
|
metrics.initializeMetrics(config)
|
||||||
|
|
||||||
// Register with custom registry
|
// Register with custom registry
|
||||||
metrics.registerMetrics()
|
metrics.registerMetrics()
|
||||||
|
|
||||||
return metrics
|
return metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -170,14 +172,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"node_id", "version", "go_version", "cluster", "environment"},
|
[]string{"node_id", "version", "go_version", "cluster", "environment"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.uptime = promauto.NewGauge(
|
m.uptime = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_uptime_seconds",
|
Name: "chorus_uptime_seconds",
|
||||||
Help: "System uptime in seconds",
|
Help: "System uptime in seconds",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// P2P metrics
|
// P2P metrics
|
||||||
m.p2pConnectedPeers = promauto.NewGauge(
|
m.p2pConnectedPeers = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@@ -185,7 +187,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Help: "Number of connected P2P peers",
|
Help: "Number of connected P2P peers",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.p2pMessagesSent = promauto.NewCounterVec(
|
m.p2pMessagesSent = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_p2p_messages_sent_total",
|
Name: "chorus_p2p_messages_sent_total",
|
||||||
@@ -193,7 +195,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"message_type", "peer_id"},
|
[]string{"message_type", "peer_id"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.p2pMessagesReceived = promauto.NewCounterVec(
|
m.p2pMessagesReceived = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_p2p_messages_received_total",
|
Name: "chorus_p2p_messages_received_total",
|
||||||
@@ -201,7 +203,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"message_type", "peer_id"},
|
[]string{"message_type", "peer_id"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.p2pMessageLatency = promauto.NewHistogramVec(
|
m.p2pMessageLatency = promauto.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_p2p_message_latency_seconds",
|
Name: "chorus_p2p_message_latency_seconds",
|
||||||
@@ -210,7 +212,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"message_type"},
|
[]string{"message_type"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// DHT metrics
|
// DHT metrics
|
||||||
m.dhtPutOperations = promauto.NewCounterVec(
|
m.dhtPutOperations = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
@@ -219,7 +221,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"status"},
|
[]string{"status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.dhtGetOperations = promauto.NewCounterVec(
|
m.dhtGetOperations = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_dht_get_operations_total",
|
Name: "chorus_dht_get_operations_total",
|
||||||
@@ -227,7 +229,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"status"},
|
[]string{"status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.dhtOperationLatency = promauto.NewHistogramVec(
|
m.dhtOperationLatency = promauto.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_dht_operation_latency_seconds",
|
Name: "chorus_dht_operation_latency_seconds",
|
||||||
@@ -236,21 +238,21 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"operation", "status"},
|
[]string{"operation", "status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.dhtProviderRecords = promauto.NewGauge(
|
m.dhtProviderRecords = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_dht_provider_records",
|
Name: "chorus_dht_provider_records",
|
||||||
Help: "Number of DHT provider records",
|
Help: "Number of DHT provider records",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.dhtContentKeys = promauto.NewGauge(
|
m.dhtContentKeys = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_dht_content_keys",
|
Name: "chorus_dht_content_keys",
|
||||||
Help: "Number of DHT content keys",
|
Help: "Number of DHT content keys",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.dhtReplicationFactor = promauto.NewGaugeVec(
|
m.dhtReplicationFactor = promauto.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_dht_replication_factor",
|
Name: "chorus_dht_replication_factor",
|
||||||
@@ -258,7 +260,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"key_hash"},
|
[]string{"key_hash"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// PubSub metrics
|
// PubSub metrics
|
||||||
m.pubsubTopics = promauto.NewGauge(
|
m.pubsubTopics = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@@ -266,7 +268,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Help: "Number of active PubSub topics",
|
Help: "Number of active PubSub topics",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.pubsubMessages = promauto.NewCounterVec(
|
m.pubsubMessages = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_pubsub_messages_total",
|
Name: "chorus_pubsub_messages_total",
|
||||||
@@ -274,7 +276,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"topic", "direction", "message_type"},
|
[]string{"topic", "direction", "message_type"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.pubsubMessageLatency = promauto.NewHistogramVec(
|
m.pubsubMessageLatency = promauto.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_pubsub_message_latency_seconds",
|
Name: "chorus_pubsub_message_latency_seconds",
|
||||||
@@ -283,7 +285,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"topic"},
|
[]string{"topic"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Election metrics
|
// Election metrics
|
||||||
m.electionTerm = promauto.NewGauge(
|
m.electionTerm = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@@ -291,7 +293,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Help: "Current election term",
|
Help: "Current election term",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.electionState = promauto.NewGaugeVec(
|
m.electionState = promauto.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_election_state",
|
Name: "chorus_election_state",
|
||||||
@@ -299,28 +301,28 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"state"},
|
[]string{"state"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.heartbeatsSent = promauto.NewCounter(
|
m.heartbeatsSent = promauto.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_heartbeats_sent_total",
|
Name: "chorus_heartbeats_sent_total",
|
||||||
Help: "Total number of heartbeats sent",
|
Help: "Total number of heartbeats sent",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.heartbeatsReceived = promauto.NewCounter(
|
m.heartbeatsReceived = promauto.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_heartbeats_received_total",
|
Name: "chorus_heartbeats_received_total",
|
||||||
Help: "Total number of heartbeats received",
|
Help: "Total number of heartbeats received",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.leadershipChanges = promauto.NewCounter(
|
m.leadershipChanges = promauto.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_leadership_changes_total",
|
Name: "chorus_leadership_changes_total",
|
||||||
Help: "Total number of leadership changes",
|
Help: "Total number of leadership changes",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Health metrics
|
// Health metrics
|
||||||
m.healthChecksPassed = promauto.NewCounterVec(
|
m.healthChecksPassed = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
@@ -329,7 +331,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"check_name"},
|
[]string{"check_name"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.healthChecksFailed = promauto.NewCounterVec(
|
m.healthChecksFailed = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_health_checks_failed_total",
|
Name: "chorus_health_checks_failed_total",
|
||||||
@@ -337,14 +339,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"check_name", "reason"},
|
[]string{"check_name", "reason"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.systemHealthScore = promauto.NewGauge(
|
m.systemHealthScore = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_system_health_score",
|
Name: "chorus_system_health_score",
|
||||||
Help: "Overall system health score (0-1)",
|
Help: "Overall system health score (0-1)",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.componentHealthScore = promauto.NewGaugeVec(
|
m.componentHealthScore = promauto.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_component_health_score",
|
Name: "chorus_component_health_score",
|
||||||
@@ -352,7 +354,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"component"},
|
[]string{"component"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Task metrics
|
// Task metrics
|
||||||
m.tasksActive = promauto.NewGauge(
|
m.tasksActive = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@@ -360,14 +362,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Help: "Number of active tasks",
|
Help: "Number of active tasks",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.tasksQueued = promauto.NewGauge(
|
m.tasksQueued = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_tasks_queued",
|
Name: "chorus_tasks_queued",
|
||||||
Help: "Number of queued tasks",
|
Help: "Number of queued tasks",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.tasksCompleted = promauto.NewCounterVec(
|
m.tasksCompleted = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_tasks_completed_total",
|
Name: "chorus_tasks_completed_total",
|
||||||
@@ -375,7 +377,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"status", "task_type"},
|
[]string{"status", "task_type"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.taskDuration = promauto.NewHistogramVec(
|
m.taskDuration = promauto.NewHistogramVec(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_task_duration_seconds",
|
Name: "chorus_task_duration_seconds",
|
||||||
@@ -384,7 +386,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"task_type", "status"},
|
[]string{"task_type", "status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
// SLURP metrics
|
// SLURP metrics
|
||||||
m.slurpGenerated = promauto.NewCounterVec(
|
m.slurpGenerated = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
@@ -393,7 +395,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"role", "status"},
|
[]string{"role", "status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.slurpGenerationTime = promauto.NewHistogram(
|
m.slurpGenerationTime = promauto.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_slurp_generation_time_seconds",
|
Name: "chorus_slurp_generation_time_seconds",
|
||||||
@@ -401,14 +403,23 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0},
|
Buckets: []float64{0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.slurpQueueLength = promauto.NewGauge(
|
m.slurpQueueLength = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_slurp_queue_length",
|
Name: "chorus_slurp_queue_length",
|
||||||
Help: "Length of SLURP generation queue",
|
Help: "Length of SLURP generation queue",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// SHHH metrics
|
||||||
|
m.shhhFindings = promauto.NewCounterVec(
|
||||||
|
prometheus.CounterOpts{
|
||||||
|
Name: "chorus_shhh_findings_total",
|
||||||
|
Help: "Total number of SHHH redaction findings",
|
||||||
|
},
|
||||||
|
[]string{"rule", "severity"},
|
||||||
|
)
|
||||||
|
|
||||||
// UCXI metrics
|
// UCXI metrics
|
||||||
m.ucxiRequests = promauto.NewCounterVec(
|
m.ucxiRequests = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
@@ -417,7 +428,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"method", "status"},
|
[]string{"method", "status"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.ucxiResolutionLatency = promauto.NewHistogram(
|
m.ucxiResolutionLatency = promauto.NewHistogram(
|
||||||
prometheus.HistogramOpts{
|
prometheus.HistogramOpts{
|
||||||
Name: "chorus_ucxi_resolution_latency_seconds",
|
Name: "chorus_ucxi_resolution_latency_seconds",
|
||||||
@@ -425,7 +436,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Buckets: config.LatencyBuckets,
|
Buckets: config.LatencyBuckets,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Resource metrics
|
// Resource metrics
|
||||||
m.cpuUsage = promauto.NewGauge(
|
m.cpuUsage = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
@@ -433,14 +444,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
Help: "CPU usage ratio (0-1)",
|
Help: "CPU usage ratio (0-1)",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.memoryUsage = promauto.NewGauge(
|
m.memoryUsage = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_memory_usage_bytes",
|
Name: "chorus_memory_usage_bytes",
|
||||||
Help: "Memory usage in bytes",
|
Help: "Memory usage in bytes",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.diskUsage = promauto.NewGaugeVec(
|
m.diskUsage = promauto.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_disk_usage_ratio",
|
Name: "chorus_disk_usage_ratio",
|
||||||
@@ -448,14 +459,14 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"mount_point"},
|
[]string{"mount_point"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.goroutines = promauto.NewGauge(
|
m.goroutines = promauto.NewGauge(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "chorus_goroutines",
|
Name: "chorus_goroutines",
|
||||||
Help: "Number of goroutines",
|
Help: "Number of goroutines",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
// Error metrics
|
// Error metrics
|
||||||
m.errors = promauto.NewCounterVec(
|
m.errors = promauto.NewCounterVec(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
@@ -464,7 +475,7 @@ func (m *CHORUSMetrics) initializeMetrics(config *MetricsConfig) {
|
|||||||
},
|
},
|
||||||
[]string{"component", "error_type"},
|
[]string{"component", "error_type"},
|
||||||
)
|
)
|
||||||
|
|
||||||
m.panics = promauto.NewCounter(
|
m.panics = promauto.NewCounter(
|
||||||
prometheus.CounterOpts{
|
prometheus.CounterOpts{
|
||||||
Name: "chorus_panics_total",
|
Name: "chorus_panics_total",
|
||||||
@@ -482,31 +493,31 @@ func (m *CHORUSMetrics) registerMetrics() {
|
|||||||
// StartServer starts the Prometheus metrics HTTP server
|
// StartServer starts the Prometheus metrics HTTP server
|
||||||
func (m *CHORUSMetrics) StartServer(config *MetricsConfig) error {
|
func (m *CHORUSMetrics) StartServer(config *MetricsConfig) error {
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
// Use custom registry
|
// Use custom registry
|
||||||
handler := promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{
|
handler := promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{
|
||||||
EnableOpenMetrics: true,
|
EnableOpenMetrics: true,
|
||||||
})
|
})
|
||||||
mux.Handle(config.MetricsPath, handler)
|
mux.Handle(config.MetricsPath, handler)
|
||||||
|
|
||||||
// Health endpoint
|
// Health endpoint
|
||||||
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write([]byte("OK"))
|
w.Write([]byte("OK"))
|
||||||
})
|
})
|
||||||
|
|
||||||
m.httpServer = &http.Server{
|
m.httpServer = &http.Server{
|
||||||
Addr: config.ListenAddr,
|
Addr: config.ListenAddr,
|
||||||
Handler: mux,
|
Handler: mux,
|
||||||
}
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
log.Printf("Starting metrics server on %s%s", config.ListenAddr, config.MetricsPath)
|
log.Printf("Starting metrics server on %s%s", config.ListenAddr, config.MetricsPath)
|
||||||
if err := m.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
if err := m.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||||
log.Printf("Metrics server error: %v", err)
|
log.Printf("Metrics server error: %v", err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -656,6 +667,15 @@ func (m *CHORUSMetrics) SetSLURPQueueLength(length int) {
|
|||||||
m.slurpQueueLength.Set(float64(length))
|
m.slurpQueueLength.Set(float64(length))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SHHH Metrics Methods
|
||||||
|
|
||||||
|
func (m *CHORUSMetrics) IncrementSHHHFindings(rule, severity string, count int) {
|
||||||
|
if m == nil || m.shhhFindings == nil || count <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
m.shhhFindings.WithLabelValues(rule, severity).Add(float64(count))
|
||||||
|
}
|
||||||
|
|
||||||
// UCXI Metrics Methods
|
// UCXI Metrics Methods
|
||||||
|
|
||||||
func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) {
|
func (m *CHORUSMetrics) IncrementUCXIRequests(method, status string) {
|
||||||
@@ -708,21 +728,21 @@ func (m *CHORUSMetrics) UpdateUptime() {
|
|||||||
func (m *CHORUSMetrics) CollectMetrics(config *MetricsConfig) {
|
func (m *CHORUSMetrics) CollectMetrics(config *MetricsConfig) {
|
||||||
systemTicker := time.NewTicker(config.SystemMetricsInterval)
|
systemTicker := time.NewTicker(config.SystemMetricsInterval)
|
||||||
resourceTicker := time.NewTicker(config.ResourceMetricsInterval)
|
resourceTicker := time.NewTicker(config.ResourceMetricsInterval)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer systemTicker.Stop()
|
defer systemTicker.Stop()
|
||||||
defer resourceTicker.Stop()
|
defer resourceTicker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-systemTicker.C:
|
case <-systemTicker.C:
|
||||||
m.UpdateUptime()
|
m.UpdateUptime()
|
||||||
// Collect other system metrics
|
// Collect other system metrics
|
||||||
|
|
||||||
case <-resourceTicker.C:
|
case <-resourceTicker.C:
|
||||||
// Collect resource metrics (would integrate with actual system monitoring)
|
// Collect resource metrics (would integrate with actual system monitoring)
|
||||||
// m.collectResourceMetrics()
|
// m.collectResourceMetrics()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|||||||
11
pkg/shhh/doc.go
Normal file
11
pkg/shhh/doc.go
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
// Package shhh provides the CHORUS secrets sentinel responsible for detecting
|
||||||
|
// and redacting sensitive values before they leave the runtime. The sentinel
|
||||||
|
// focuses on predictable failure modes (log emission, telemetry fan-out,
|
||||||
|
// request forwarding) and offers a composable API for registering additional
|
||||||
|
// redaction rules, emitting audit events, and tracking operational metrics.
|
||||||
|
//
|
||||||
|
// The initial implementation focuses on high-signal secrets (API keys,
|
||||||
|
// bearer/OAuth tokens, private keys) so the runtime can start integrating
|
||||||
|
// SHHH into COOEE and WHOOSH logging immediately while the broader roadmap
|
||||||
|
// items (automated redaction replay, policy driven rules) continue landing.
|
||||||
|
package shhh
|
||||||
130
pkg/shhh/rule.go
Normal file
130
pkg/shhh/rule.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package shhh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/base64"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type compiledRule struct {
|
||||||
|
name string
|
||||||
|
regex *regexp.Regexp
|
||||||
|
replacement string
|
||||||
|
severity Severity
|
||||||
|
tags []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type matchRecord struct {
|
||||||
|
value string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *compiledRule) apply(in string) (string, []matchRecord) {
|
||||||
|
indices := r.regex.FindAllStringSubmatchIndex(in, -1)
|
||||||
|
if len(indices) == 0 {
|
||||||
|
return in, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var builder strings.Builder
|
||||||
|
builder.Grow(len(in))
|
||||||
|
|
||||||
|
matches := make([]matchRecord, 0, len(indices))
|
||||||
|
last := 0
|
||||||
|
for _, loc := range indices {
|
||||||
|
start, end := loc[0], loc[1]
|
||||||
|
builder.WriteString(in[last:start])
|
||||||
|
replaced := r.regex.ExpandString(nil, r.replacement, in, loc)
|
||||||
|
builder.Write(replaced)
|
||||||
|
matches = append(matches, matchRecord{value: in[start:end]})
|
||||||
|
last = end
|
||||||
|
}
|
||||||
|
builder.WriteString(in[last:])
|
||||||
|
|
||||||
|
return builder.String(), matches
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDefaultRuleConfigs(placeholder string) []RuleConfig {
|
||||||
|
if placeholder == "" {
|
||||||
|
placeholder = "[REDACTED]"
|
||||||
|
}
|
||||||
|
return []RuleConfig{
|
||||||
|
{
|
||||||
|
Name: "bearer-token",
|
||||||
|
Pattern: `(?i)(authorization\s*:\s*bearer\s+)([A-Za-z0-9\-._~+/]+=*)`,
|
||||||
|
ReplacementTemplate: "$1" + placeholder,
|
||||||
|
Severity: SeverityMedium,
|
||||||
|
Tags: []string{"token", "http"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "api-key",
|
||||||
|
Pattern: `(?i)((?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||||
|
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||||
|
Severity: SeverityHigh,
|
||||||
|
Tags: []string{"credentials"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "openai-secret",
|
||||||
|
Pattern: `(sk-[A-Za-z0-9]{20,})`,
|
||||||
|
ReplacementTemplate: placeholder,
|
||||||
|
Severity: SeverityHigh,
|
||||||
|
Tags: []string{"llm", "api"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "oauth-refresh-token",
|
||||||
|
Pattern: `(?i)(refresh_token"?\s*[:=]\s*["']?)([A-Za-z0-9\-._~+/]{8,})(["']?)`,
|
||||||
|
ReplacementTemplate: "$1" + placeholder + "$3",
|
||||||
|
Severity: SeverityMedium,
|
||||||
|
Tags: []string{"oauth"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "private-key-block",
|
||||||
|
Pattern: `(?s)(-----BEGIN [^-]+ PRIVATE KEY-----)[^-]+(-----END [^-]+ PRIVATE KEY-----)`,
|
||||||
|
ReplacementTemplate: "$1\n" + placeholder + "\n$2",
|
||||||
|
Severity: SeverityHigh,
|
||||||
|
Tags: []string{"pem", "key"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func compileRules(cfg Config, placeholder string) ([]*compiledRule, error) {
|
||||||
|
configs := make([]RuleConfig, 0)
|
||||||
|
if !cfg.DisableDefaultRules {
|
||||||
|
configs = append(configs, buildDefaultRuleConfigs(placeholder)...)
|
||||||
|
}
|
||||||
|
configs = append(configs, cfg.CustomRules...)
|
||||||
|
|
||||||
|
rules := make([]*compiledRule, 0, len(configs))
|
||||||
|
for _, rc := range configs {
|
||||||
|
if rc.Name == "" || rc.Pattern == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
replacement := rc.ReplacementTemplate
|
||||||
|
if replacement == "" {
|
||||||
|
replacement = placeholder
|
||||||
|
}
|
||||||
|
re, err := regexp.Compile(rc.Pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
compiled := &compiledRule{
|
||||||
|
name: rc.Name,
|
||||||
|
replacement: replacement,
|
||||||
|
regex: re,
|
||||||
|
severity: rc.Severity,
|
||||||
|
tags: append([]string(nil), rc.Tags...),
|
||||||
|
}
|
||||||
|
rules = append(rules, compiled)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.SliceStable(rules, func(i, j int) bool {
|
||||||
|
return rules[i].name < rules[j].name
|
||||||
|
})
|
||||||
|
|
||||||
|
return rules, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hashSecret(value string) string {
|
||||||
|
sum := sha256.Sum256([]byte(value))
|
||||||
|
return base64.RawStdEncoding.EncodeToString(sum[:])
|
||||||
|
}
|
||||||
407
pkg/shhh/sentinel.go
Normal file
407
pkg/shhh/sentinel.go
Normal file
@@ -0,0 +1,407 @@
|
|||||||
|
package shhh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Option configures the sentinel during construction.
|
||||||
|
type Option func(*Sentinel)
|
||||||
|
|
||||||
|
// FindingObserver receives aggregated findings for each redaction operation.
|
||||||
|
type FindingObserver func(context.Context, []Finding)
|
||||||
|
|
||||||
|
// WithAuditSink attaches an audit sink for per-redaction events.
|
||||||
|
func WithAuditSink(sink AuditSink) Option {
|
||||||
|
return func(s *Sentinel) {
|
||||||
|
s.audit = sink
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithStats allows callers to supply a shared stats collector.
|
||||||
|
func WithStats(stats *Stats) Option {
|
||||||
|
return func(s *Sentinel) {
|
||||||
|
s.stats = stats
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithFindingObserver registers an observer that is invoked whenever redaction
|
||||||
|
// produces findings.
|
||||||
|
func WithFindingObserver(observer FindingObserver) Option {
|
||||||
|
return func(s *Sentinel) {
|
||||||
|
if observer == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.observers = append(s.observers, observer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sentinel performs secret detection/redaction across text payloads.
|
||||||
|
type Sentinel struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
enabled bool
|
||||||
|
placeholder string
|
||||||
|
rules []*compiledRule
|
||||||
|
audit AuditSink
|
||||||
|
stats *Stats
|
||||||
|
observers []FindingObserver
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSentinel creates a new secrets sentinel using the provided configuration.
|
||||||
|
func NewSentinel(cfg Config, opts ...Option) (*Sentinel, error) {
|
||||||
|
placeholder := cfg.RedactionPlaceholder
|
||||||
|
if placeholder == "" {
|
||||||
|
placeholder = "[REDACTED]"
|
||||||
|
}
|
||||||
|
|
||||||
|
s := &Sentinel{
|
||||||
|
enabled: !cfg.Disabled,
|
||||||
|
placeholder: placeholder,
|
||||||
|
stats: NewStats(),
|
||||||
|
}
|
||||||
|
for _, opt := range opts {
|
||||||
|
opt(s)
|
||||||
|
}
|
||||||
|
if s.stats == nil {
|
||||||
|
s.stats = NewStats()
|
||||||
|
}
|
||||||
|
|
||||||
|
rules, err := compileRules(cfg, placeholder)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("compile SHHH rules: %w", err)
|
||||||
|
}
|
||||||
|
if len(rules) == 0 {
|
||||||
|
return nil, errors.New("no SHHH rules configured")
|
||||||
|
}
|
||||||
|
s.rules = rules
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enabled reports whether the sentinel is actively redacting.
|
||||||
|
func (s *Sentinel) Enabled() bool {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return s.enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// Toggle enables or disables the sentinel at runtime.
|
||||||
|
func (s *Sentinel) Toggle(enabled bool) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.enabled = enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetAuditSink updates the audit sink at runtime.
|
||||||
|
func (s *Sentinel) SetAuditSink(sink AuditSink) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.audit = sink
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddFindingObserver registers an observer after construction.
|
||||||
|
func (s *Sentinel) AddFindingObserver(observer FindingObserver) {
|
||||||
|
if observer == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.observers = append(s.observers, observer)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StatsSnapshot returns a snapshot of the current counters.
|
||||||
|
func (s *Sentinel) StatsSnapshot() StatsSnapshot {
|
||||||
|
s.mu.RLock()
|
||||||
|
stats := s.stats
|
||||||
|
s.mu.RUnlock()
|
||||||
|
if stats == nil {
|
||||||
|
return StatsSnapshot{}
|
||||||
|
}
|
||||||
|
return stats.Snapshot()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RedactText scans the provided text and redacts any findings.
|
||||||
|
func (s *Sentinel) RedactText(ctx context.Context, text string, labels map[string]string) (string, []Finding) {
|
||||||
|
s.mu.RLock()
|
||||||
|
enabled := s.enabled
|
||||||
|
rules := s.rules
|
||||||
|
stats := s.stats
|
||||||
|
audit := s.audit
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
if !enabled || len(rules) == 0 {
|
||||||
|
return text, nil
|
||||||
|
}
|
||||||
|
if stats != nil {
|
||||||
|
stats.IncScan()
|
||||||
|
}
|
||||||
|
|
||||||
|
aggregates := make(map[string]*findingAggregate)
|
||||||
|
current := text
|
||||||
|
path := derivePath(labels)
|
||||||
|
|
||||||
|
for _, rule := range rules {
|
||||||
|
redacted, matches := rule.apply(current)
|
||||||
|
if len(matches) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
current = redacted
|
||||||
|
if stats != nil {
|
||||||
|
stats.AddFindings(rule.name, len(matches))
|
||||||
|
}
|
||||||
|
recordAggregate(aggregates, rule, path, len(matches))
|
||||||
|
|
||||||
|
if audit != nil {
|
||||||
|
metadata := cloneLabels(labels)
|
||||||
|
for _, match := range matches {
|
||||||
|
event := AuditEvent{
|
||||||
|
Rule: rule.name,
|
||||||
|
Severity: rule.severity,
|
||||||
|
Tags: append([]string(nil), rule.tags...),
|
||||||
|
Path: path,
|
||||||
|
Hash: hashSecret(match.value),
|
||||||
|
Metadata: metadata,
|
||||||
|
}
|
||||||
|
audit.RecordRedaction(ctx, event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
findings := flattenAggregates(aggregates)
|
||||||
|
s.notifyObservers(ctx, findings)
|
||||||
|
return current, findings
|
||||||
|
}
|
||||||
|
|
||||||
|
// RedactMap walks the map and redacts in-place. It returns the collected findings.
|
||||||
|
func (s *Sentinel) RedactMap(ctx context.Context, payload map[string]any) []Finding {
|
||||||
|
return s.RedactMapWithLabels(ctx, payload, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RedactMapWithLabels allows callers to specify base labels that will be merged
|
||||||
|
// into metadata for nested structures.
|
||||||
|
func (s *Sentinel) RedactMapWithLabels(ctx context.Context, payload map[string]any, baseLabels map[string]string) []Finding {
|
||||||
|
if payload == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
aggregates := make(map[string]*findingAggregate)
|
||||||
|
s.redactValue(ctx, payload, "", baseLabels, aggregates)
|
||||||
|
findings := flattenAggregates(aggregates)
|
||||||
|
s.notifyObservers(ctx, findings)
|
||||||
|
return findings
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Sentinel) redactValue(ctx context.Context, value any, path string, baseLabels map[string]string, agg map[string]*findingAggregate) {
|
||||||
|
switch v := value.(type) {
|
||||||
|
case map[string]interface{}:
|
||||||
|
for key, val := range v {
|
||||||
|
childPath := joinPath(path, key)
|
||||||
|
switch typed := val.(type) {
|
||||||
|
case string:
|
||||||
|
labels := mergeLabels(baseLabels, childPath)
|
||||||
|
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||||
|
if redacted != typed {
|
||||||
|
v[key] = redacted
|
||||||
|
}
|
||||||
|
mergeAggregates(agg, findings)
|
||||||
|
case fmt.Stringer:
|
||||||
|
labels := mergeLabels(baseLabels, childPath)
|
||||||
|
text := typed.String()
|
||||||
|
redacted, findings := s.RedactText(ctx, text, labels)
|
||||||
|
if redacted != text {
|
||||||
|
v[key] = redacted
|
||||||
|
}
|
||||||
|
mergeAggregates(agg, findings)
|
||||||
|
default:
|
||||||
|
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case []interface{}:
|
||||||
|
for idx, item := range v {
|
||||||
|
childPath := indexPath(path, idx)
|
||||||
|
switch typed := item.(type) {
|
||||||
|
case string:
|
||||||
|
labels := mergeLabels(baseLabels, childPath)
|
||||||
|
redacted, findings := s.RedactText(ctx, typed, labels)
|
||||||
|
if redacted != typed {
|
||||||
|
v[idx] = redacted
|
||||||
|
}
|
||||||
|
mergeAggregates(agg, findings)
|
||||||
|
case fmt.Stringer:
|
||||||
|
labels := mergeLabels(baseLabels, childPath)
|
||||||
|
text := typed.String()
|
||||||
|
redacted, findings := s.RedactText(ctx, text, labels)
|
||||||
|
if redacted != text {
|
||||||
|
v[idx] = redacted
|
||||||
|
}
|
||||||
|
mergeAggregates(agg, findings)
|
||||||
|
default:
|
||||||
|
s.redactValue(ctx, typed, childPath, baseLabels, agg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case []string:
|
||||||
|
for idx, item := range v {
|
||||||
|
childPath := indexPath(path, idx)
|
||||||
|
labels := mergeLabels(baseLabels, childPath)
|
||||||
|
redacted, findings := s.RedactText(ctx, item, labels)
|
||||||
|
if redacted != item {
|
||||||
|
v[idx] = redacted
|
||||||
|
}
|
||||||
|
mergeAggregates(agg, findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Sentinel) notifyObservers(ctx context.Context, findings []Finding) {
|
||||||
|
if len(findings) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
findingsCopy := append([]Finding(nil), findings...)
|
||||||
|
s.mu.RLock()
|
||||||
|
observers := append([]FindingObserver(nil), s.observers...)
|
||||||
|
s.mu.RUnlock()
|
||||||
|
for _, observer := range observers {
|
||||||
|
observer(ctx, findingsCopy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeAggregates(dest map[string]*findingAggregate, findings []Finding) {
|
||||||
|
for i := range findings {
|
||||||
|
f := findings[i]
|
||||||
|
agg := dest[f.Rule]
|
||||||
|
if agg == nil {
|
||||||
|
agg = &findingAggregate{
|
||||||
|
rule: f.Rule,
|
||||||
|
severity: f.Severity,
|
||||||
|
tags: append([]string(nil), f.Tags...),
|
||||||
|
locations: make(map[string]int),
|
||||||
|
}
|
||||||
|
dest[f.Rule] = agg
|
||||||
|
}
|
||||||
|
agg.count += f.Count
|
||||||
|
for _, loc := range f.Locations {
|
||||||
|
agg.locations[loc.Path] += loc.Count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func recordAggregate(dest map[string]*findingAggregate, rule *compiledRule, path string, count int) {
|
||||||
|
agg := dest[rule.name]
|
||||||
|
if agg == nil {
|
||||||
|
agg = &findingAggregate{
|
||||||
|
rule: rule.name,
|
||||||
|
severity: rule.severity,
|
||||||
|
tags: append([]string(nil), rule.tags...),
|
||||||
|
locations: make(map[string]int),
|
||||||
|
}
|
||||||
|
dest[rule.name] = agg
|
||||||
|
}
|
||||||
|
agg.count += count
|
||||||
|
if path != "" {
|
||||||
|
agg.locations[path] += count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func flattenAggregates(agg map[string]*findingAggregate) []Finding {
|
||||||
|
if len(agg) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(agg))
|
||||||
|
for key := range agg {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
findings := make([]Finding, 0, len(agg))
|
||||||
|
for _, key := range keys {
|
||||||
|
entry := agg[key]
|
||||||
|
locations := make([]Location, 0, len(entry.locations))
|
||||||
|
if len(entry.locations) > 0 {
|
||||||
|
paths := make([]string, 0, len(entry.locations))
|
||||||
|
for path := range entry.locations {
|
||||||
|
paths = append(paths, path)
|
||||||
|
}
|
||||||
|
sort.Strings(paths)
|
||||||
|
for _, path := range paths {
|
||||||
|
locations = append(locations, Location{Path: path, Count: entry.locations[path]})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
findings = append(findings, Finding{
|
||||||
|
Rule: entry.rule,
|
||||||
|
Severity: entry.severity,
|
||||||
|
Tags: append([]string(nil), entry.tags...),
|
||||||
|
Count: entry.count,
|
||||||
|
Locations: locations,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return findings
|
||||||
|
}
|
||||||
|
|
||||||
|
func derivePath(labels map[string]string) string {
|
||||||
|
if labels == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if path := labels["path"]; path != "" {
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
if path := labels["source"]; path != "" {
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
if path := labels["field"]; path != "" {
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func cloneLabels(labels map[string]string) map[string]string {
|
||||||
|
if len(labels) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
clone := make(map[string]string, len(labels))
|
||||||
|
for k, v := range labels {
|
||||||
|
clone[k] = v
|
||||||
|
}
|
||||||
|
return clone
|
||||||
|
}
|
||||||
|
|
||||||
|
func joinPath(prefix, key string) string {
|
||||||
|
if prefix == "" {
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
if key == "" {
|
||||||
|
return prefix
|
||||||
|
}
|
||||||
|
return prefix + "." + key
|
||||||
|
}
|
||||||
|
|
||||||
|
func indexPath(prefix string, idx int) string {
|
||||||
|
if prefix == "" {
|
||||||
|
return fmt.Sprintf("[%d]", idx)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s[%d]", prefix, idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func mergeLabels(base map[string]string, path string) map[string]string {
|
||||||
|
if base == nil && path == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
labels := cloneLabels(base)
|
||||||
|
if labels == nil {
|
||||||
|
labels = make(map[string]string, 1)
|
||||||
|
}
|
||||||
|
if path != "" {
|
||||||
|
labels["path"] = path
|
||||||
|
}
|
||||||
|
return labels
|
||||||
|
}
|
||||||
|
|
||||||
|
type findingAggregate struct {
|
||||||
|
rule string
|
||||||
|
severity Severity
|
||||||
|
tags []string
|
||||||
|
count int
|
||||||
|
locations map[string]int
|
||||||
|
}
|
||||||
95
pkg/shhh/sentinel_test.go
Normal file
95
pkg/shhh/sentinel_test.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
package shhh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
type recordingSink struct {
|
||||||
|
events []AuditEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *recordingSink) RecordRedaction(_ context.Context, event AuditEvent) {
|
||||||
|
r.events = append(r.events, event)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedactText_DefaultRules(t *testing.T) {
|
||||||
|
sentinel, err := NewSentinel(Config{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
input := "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.secret"
|
||||||
|
redacted, findings := sentinel.RedactText(context.Background(), input, map[string]string{"source": "http.request.headers.authorization"})
|
||||||
|
|
||||||
|
require.Equal(t, "Authorization: Bearer [REDACTED]", redacted)
|
||||||
|
require.Len(t, findings, 1)
|
||||||
|
require.Equal(t, "bearer-token", findings[0].Rule)
|
||||||
|
require.Equal(t, 1, findings[0].Count)
|
||||||
|
require.NotEmpty(t, findings[0].Locations)
|
||||||
|
|
||||||
|
snapshot := sentinel.StatsSnapshot()
|
||||||
|
require.Equal(t, uint64(1), snapshot.TotalScans)
|
||||||
|
require.Equal(t, uint64(1), snapshot.TotalFindings)
|
||||||
|
require.Equal(t, uint64(1), snapshot.PerRuleFindings["bearer-token"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRedactMap_NestedStructures(t *testing.T) {
|
||||||
|
sentinel, err := NewSentinel(Config{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
payload := map[string]any{
|
||||||
|
"config": map[string]any{
|
||||||
|
"api_key": "API_KEY=1234567890ABCDEFG",
|
||||||
|
},
|
||||||
|
"tokens": []any{
|
||||||
|
"sk-test1234567890ABCDEF",
|
||||||
|
map[string]any{"refresh": "refresh_token=abcdef12345"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
findings := sentinel.RedactMap(context.Background(), payload)
|
||||||
|
require.NotEmpty(t, findings)
|
||||||
|
|
||||||
|
config := payload["config"].(map[string]any)
|
||||||
|
require.Equal(t, "API_KEY=[REDACTED]", config["api_key"])
|
||||||
|
|
||||||
|
tokens := payload["tokens"].([]any)
|
||||||
|
require.Equal(t, "[REDACTED]", tokens[0])
|
||||||
|
|
||||||
|
inner := tokens[1].(map[string]any)
|
||||||
|
require.Equal(t, "refresh_token=[REDACTED]", inner["refresh"])
|
||||||
|
|
||||||
|
total := 0
|
||||||
|
for _, finding := range findings {
|
||||||
|
total += finding.Count
|
||||||
|
}
|
||||||
|
require.Equal(t, 3, total)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAuditSinkReceivesEvents(t *testing.T) {
|
||||||
|
sink := &recordingSink{}
|
||||||
|
cfg := Config{
|
||||||
|
DisableDefaultRules: true,
|
||||||
|
CustomRules: []RuleConfig{
|
||||||
|
{
|
||||||
|
Name: "custom-secret",
|
||||||
|
Pattern: `(secret\s*=\s*)([A-Za-z0-9]{6,})`,
|
||||||
|
ReplacementTemplate: "$1[REDACTED]",
|
||||||
|
Severity: SeverityHigh,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
sentinel, err := NewSentinel(cfg, WithAuditSink(sink))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
_, findings := sentinel.RedactText(context.Background(), "secret=mysecretvalue", map[string]string{"source": "test"})
|
||||||
|
require.Len(t, findings, 1)
|
||||||
|
require.Equal(t, 1, findings[0].Count)
|
||||||
|
|
||||||
|
require.Len(t, sink.events, 1)
|
||||||
|
require.Equal(t, "custom-secret", sink.events[0].Rule)
|
||||||
|
require.NotEmpty(t, sink.events[0].Hash)
|
||||||
|
require.Equal(t, "test", sink.events[0].Path)
|
||||||
|
}
|
||||||
60
pkg/shhh/stats.go
Normal file
60
pkg/shhh/stats.go
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
package shhh
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Stats tracks aggregate counts for the sentinel.
|
||||||
|
type Stats struct {
|
||||||
|
totalScans atomic.Uint64
|
||||||
|
totalFindings atomic.Uint64
|
||||||
|
perRule sync.Map // string -> *atomic.Uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStats constructs a Stats collector.
|
||||||
|
func NewStats() *Stats {
|
||||||
|
return &Stats{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// IncScan increments the total scan counter.
|
||||||
|
func (s *Stats) IncScan() {
|
||||||
|
if s == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.totalScans.Add(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddFindings records findings for a rule.
|
||||||
|
func (s *Stats) AddFindings(rule string, count int) {
|
||||||
|
if s == nil || count <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.totalFindings.Add(uint64(count))
|
||||||
|
counterAny, _ := s.perRule.LoadOrStore(rule, new(atomic.Uint64))
|
||||||
|
counter := counterAny.(*atomic.Uint64)
|
||||||
|
counter.Add(uint64(count))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Snapshot returns a point-in-time view of the counters.
|
||||||
|
func (s *Stats) Snapshot() StatsSnapshot {
|
||||||
|
if s == nil {
|
||||||
|
return StatsSnapshot{}
|
||||||
|
}
|
||||||
|
snapshot := StatsSnapshot{
|
||||||
|
TotalScans: s.totalScans.Load(),
|
||||||
|
TotalFindings: s.totalFindings.Load(),
|
||||||
|
PerRuleFindings: make(map[string]uint64),
|
||||||
|
}
|
||||||
|
s.perRule.Range(func(key, value any) bool {
|
||||||
|
name, ok := key.(string)
|
||||||
|
if !ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if counter, ok := value.(*atomic.Uint64); ok {
|
||||||
|
snapshot.PerRuleFindings[name] = counter.Load()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
return snapshot
|
||||||
|
}
|
||||||
73
pkg/shhh/types.go
Normal file
73
pkg/shhh/types.go
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
package shhh
|
||||||
|
|
||||||
|
import "context"
|
||||||
|
|
||||||
|
// Severity represents the criticality associated with a redaction finding.
|
||||||
|
type Severity string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// SeverityLow indicates low-impact findings (e.g. non-production credentials).
|
||||||
|
SeverityLow Severity = "low"
|
||||||
|
// SeverityMedium indicates medium impact findings (e.g. access tokens).
|
||||||
|
SeverityMedium Severity = "medium"
|
||||||
|
// SeverityHigh indicates high-impact findings (e.g. private keys).
|
||||||
|
SeverityHigh Severity = "high"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RuleConfig defines a redaction rule that SHHH should enforce.
|
||||||
|
type RuleConfig struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Pattern string `json:"pattern"`
|
||||||
|
ReplacementTemplate string `json:"replacement_template"`
|
||||||
|
Severity Severity `json:"severity"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config controls sentinel behaviour.
|
||||||
|
type Config struct {
|
||||||
|
// Disabled toggles redaction off entirely.
|
||||||
|
Disabled bool `json:"disabled"`
|
||||||
|
// RedactionPlaceholder overrides the default placeholder value.
|
||||||
|
RedactionPlaceholder string `json:"redaction_placeholder"`
|
||||||
|
// DisableDefaultRules disables the built-in curated rule set.
|
||||||
|
DisableDefaultRules bool `json:"disable_default_rules"`
|
||||||
|
// CustomRules allows callers to append bespoke redaction patterns.
|
||||||
|
CustomRules []RuleConfig `json:"custom_rules"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finding represents a single rule firing during redaction.
|
||||||
|
type Finding struct {
|
||||||
|
Rule string `json:"rule"`
|
||||||
|
Severity Severity `json:"severity"`
|
||||||
|
Tags []string `json:"tags,omitempty"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
Locations []Location `json:"locations,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Location describes where a secret was found.
|
||||||
|
type Location struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// StatsSnapshot exposes aggregate counters for observability.
|
||||||
|
type StatsSnapshot struct {
|
||||||
|
TotalScans uint64 `json:"total_scans"`
|
||||||
|
TotalFindings uint64 `json:"total_findings"`
|
||||||
|
PerRuleFindings map[string]uint64 `json:"per_rule_findings"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AuditEvent captures a single redaction occurrence for downstream sinks.
|
||||||
|
type AuditEvent struct {
|
||||||
|
Rule string `json:"rule"`
|
||||||
|
Severity Severity `json:"severity"`
|
||||||
|
Tags []string `json:"tags,omitempty"`
|
||||||
|
Path string `json:"path,omitempty"`
|
||||||
|
Hash string `json:"hash"`
|
||||||
|
Metadata map[string]string `json:"metadata,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AuditSink receives redaction events for long term storage / replay.
|
||||||
|
type AuditSink interface {
|
||||||
|
RecordRedaction(ctx context.Context, event AuditEvent)
|
||||||
|
}
|
||||||
@@ -13,11 +13,11 @@ import (
|
|||||||
|
|
||||||
// DecisionPublisher handles publishing task completion decisions to encrypted DHT storage
|
// DecisionPublisher handles publishing task completion decisions to encrypted DHT storage
|
||||||
type DecisionPublisher struct {
|
type DecisionPublisher struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
config *config.Config
|
config *config.Config
|
||||||
dhtStorage storage.UCXLStorage
|
dhtStorage storage.UCXLStorage
|
||||||
nodeID string
|
nodeID string
|
||||||
agentName string
|
agentName string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewDecisionPublisher creates a new decision publisher
|
// NewDecisionPublisher creates a new decision publisher
|
||||||
@@ -39,28 +39,28 @@ func NewDecisionPublisher(
|
|||||||
|
|
||||||
// TaskDecision represents a decision made by an agent upon task completion
|
// TaskDecision represents a decision made by an agent upon task completion
|
||||||
type TaskDecision struct {
|
type TaskDecision struct {
|
||||||
Agent string `json:"agent"`
|
Agent string `json:"agent"`
|
||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
Project string `json:"project"`
|
Project string `json:"project"`
|
||||||
Task string `json:"task"`
|
Task string `json:"task"`
|
||||||
Decision string `json:"decision"`
|
Decision string `json:"decision"`
|
||||||
Context map[string]interface{} `json:"context"`
|
Context map[string]interface{} `json:"context"`
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
Success bool `json:"success"`
|
Success bool `json:"success"`
|
||||||
ErrorMessage string `json:"error_message,omitempty"`
|
ErrorMessage string `json:"error_message,omitempty"`
|
||||||
FilesModified []string `json:"files_modified,omitempty"`
|
FilesModified []string `json:"files_modified,omitempty"`
|
||||||
LinesChanged int `json:"lines_changed,omitempty"`
|
LinesChanged int `json:"lines_changed,omitempty"`
|
||||||
TestResults *TestResults `json:"test_results,omitempty"`
|
TestResults *TestResults `json:"test_results,omitempty"`
|
||||||
Dependencies []string `json:"dependencies,omitempty"`
|
Dependencies []string `json:"dependencies,omitempty"`
|
||||||
NextSteps []string `json:"next_steps,omitempty"`
|
NextSteps []string `json:"next_steps,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestResults captures test execution results
|
// TestResults captures test execution results
|
||||||
type TestResults struct {
|
type TestResults struct {
|
||||||
Passed int `json:"passed"`
|
Passed int `json:"passed"`
|
||||||
Failed int `json:"failed"`
|
Failed int `json:"failed"`
|
||||||
Skipped int `json:"skipped"`
|
Skipped int `json:"skipped"`
|
||||||
Coverage float64 `json:"coverage,omitempty"`
|
Coverage float64 `json:"coverage,omitempty"`
|
||||||
FailedTests []string `json:"failed_tests,omitempty"`
|
FailedTests []string `json:"failed_tests,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,7 +74,11 @@ func (dp *DecisionPublisher) PublishTaskDecision(decision *TaskDecision) error {
|
|||||||
decision.Role = dp.config.Agent.Role
|
decision.Role = dp.config.Agent.Role
|
||||||
}
|
}
|
||||||
if decision.Project == "" {
|
if decision.Project == "" {
|
||||||
decision.Project = "default-project" // TODO: Add project field to config
|
if project := dp.config.Agent.Project; project != "" {
|
||||||
|
decision.Project = project
|
||||||
|
} else {
|
||||||
|
decision.Project = "chorus"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if decision.Timestamp.IsZero() {
|
if decision.Timestamp.IsZero() {
|
||||||
decision.Timestamp = time.Now()
|
decision.Timestamp = time.Now()
|
||||||
@@ -173,16 +177,16 @@ func (dp *DecisionPublisher) PublishArchitecturalDecision(
|
|||||||
nextSteps []string,
|
nextSteps []string,
|
||||||
) error {
|
) error {
|
||||||
taskDecision := &TaskDecision{
|
taskDecision := &TaskDecision{
|
||||||
Task: taskName,
|
Task: taskName,
|
||||||
Decision: decision,
|
Decision: decision,
|
||||||
Success: true,
|
Success: true,
|
||||||
NextSteps: nextSteps,
|
NextSteps: nextSteps,
|
||||||
Context: map[string]interface{}{
|
Context: map[string]interface{}{
|
||||||
"decision_type": "architecture",
|
"decision_type": "architecture",
|
||||||
"rationale": rationale,
|
"rationale": rationale,
|
||||||
"alternatives": alternatives,
|
"alternatives": alternatives,
|
||||||
"implications": implications,
|
"implications": implications,
|
||||||
"node_id": dp.nodeID,
|
"node_id": dp.nodeID,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -291,7 +295,7 @@ func (dp *DecisionPublisher) SubscribeToDecisions(
|
|||||||
) error {
|
) error {
|
||||||
// This is a placeholder for future pubsub implementation
|
// This is a placeholder for future pubsub implementation
|
||||||
// For now, we'll implement a simple polling mechanism
|
// For now, we'll implement a simple polling mechanism
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
ticker := time.NewTicker(30 * time.Second)
|
ticker := time.NewTicker(30 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
@@ -341,10 +345,10 @@ func (dp *DecisionPublisher) PublishSystemStatus(
|
|||||||
Decision: status,
|
Decision: status,
|
||||||
Success: dp.allHealthChecksPass(healthChecks),
|
Success: dp.allHealthChecksPass(healthChecks),
|
||||||
Context: map[string]interface{}{
|
Context: map[string]interface{}{
|
||||||
"decision_type": "system",
|
"decision_type": "system",
|
||||||
"metrics": metrics,
|
"metrics": metrics,
|
||||||
"health_checks": healthChecks,
|
"health_checks": healthChecks,
|
||||||
"node_id": dp.nodeID,
|
"node_id": dp.nodeID,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,13 +368,17 @@ func (dp *DecisionPublisher) allHealthChecksPass(healthChecks map[string]bool) b
|
|||||||
// GetPublisherMetrics returns metrics about the decision publisher
|
// GetPublisherMetrics returns metrics about the decision publisher
|
||||||
func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} {
|
func (dp *DecisionPublisher) GetPublisherMetrics() map[string]interface{} {
|
||||||
dhtMetrics := dp.dhtStorage.GetMetrics()
|
dhtMetrics := dp.dhtStorage.GetMetrics()
|
||||||
|
project := dp.config.Agent.Project
|
||||||
return map[string]interface{}{
|
if project == "" {
|
||||||
"node_id": dp.nodeID,
|
project = "chorus"
|
||||||
"agent_name": dp.agentName,
|
|
||||||
"current_role": dp.config.Agent.Role,
|
|
||||||
"project": "default-project", // TODO: Add project field to config
|
|
||||||
"dht_metrics": dhtMetrics,
|
|
||||||
"last_publish": time.Now(), // This would be tracked in a real implementation
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
return map[string]interface{}{
|
||||||
|
"node_id": dp.nodeID,
|
||||||
|
"agent_name": dp.agentName,
|
||||||
|
"current_role": dp.config.Agent.Role,
|
||||||
|
"project": project,
|
||||||
|
"dht_metrics": dhtMetrics,
|
||||||
|
"last_publish": time.Now(), // This would be tracked in a real implementation
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
443
pubsub/pubsub.go
443
pubsub/pubsub.go
@@ -8,9 +8,10 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"chorus/pkg/shhh"
|
||||||
|
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||||
"github.com/libp2p/go-libp2p/core/host"
|
"github.com/libp2p/go-libp2p/core/host"
|
||||||
"github.com/libp2p/go-libp2p/core/peer"
|
"github.com/libp2p/go-libp2p/core/peer"
|
||||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion
|
// PubSub handles publish/subscribe messaging for Bzzz coordination and HMMM meta-discussion
|
||||||
@@ -19,36 +20,42 @@ type PubSub struct {
|
|||||||
host host.Host
|
host host.Host
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
|
|
||||||
// Topic subscriptions
|
// Topic subscriptions
|
||||||
chorusTopic *pubsub.Topic
|
chorusTopic *pubsub.Topic
|
||||||
hmmmTopic *pubsub.Topic
|
hmmmTopic *pubsub.Topic
|
||||||
contextTopic *pubsub.Topic
|
contextTopic *pubsub.Topic
|
||||||
|
|
||||||
// Message subscriptions
|
// Message subscriptions
|
||||||
chorusSub *pubsub.Subscription
|
chorusSub *pubsub.Subscription
|
||||||
hmmmSub *pubsub.Subscription
|
hmmmSub *pubsub.Subscription
|
||||||
contextSub *pubsub.Subscription
|
contextSub *pubsub.Subscription
|
||||||
|
|
||||||
// Dynamic topic management
|
// Dynamic topic management
|
||||||
dynamicTopics map[string]*pubsub.Topic
|
dynamicTopics map[string]*pubsub.Topic
|
||||||
dynamicTopicsMux sync.RWMutex
|
dynamicTopicsMux sync.RWMutex
|
||||||
dynamicSubs map[string]*pubsub.Subscription
|
dynamicSubs map[string]*pubsub.Subscription
|
||||||
dynamicSubsMux sync.RWMutex
|
dynamicSubsMux sync.RWMutex
|
||||||
|
dynamicHandlers map[string]func([]byte, peer.ID)
|
||||||
|
dynamicHandlersMux sync.RWMutex
|
||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
chorusTopicName string
|
chorusTopicName string
|
||||||
hmmmTopicName string
|
hmmmTopicName string
|
||||||
contextTopicName string
|
contextTopicName string
|
||||||
|
|
||||||
// External message handler for HMMM messages
|
// External message handler for HMMM messages
|
||||||
HmmmMessageHandler func(msg Message, from peer.ID)
|
HmmmMessageHandler func(msg Message, from peer.ID)
|
||||||
|
|
||||||
// External message handler for Context Feedback messages
|
// External message handler for Context Feedback messages
|
||||||
ContextFeedbackHandler func(msg Message, from peer.ID)
|
ContextFeedbackHandler func(msg Message, from peer.ID)
|
||||||
|
|
||||||
// Hypercore-style logging
|
// Hypercore-style logging
|
||||||
hypercoreLog HypercoreLogger
|
hypercoreLog HypercoreLogger
|
||||||
|
|
||||||
|
// SHHH sentinel
|
||||||
|
redactor *shhh.Sentinel
|
||||||
|
redactorMux sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// HypercoreLogger interface for dependency injection
|
// HypercoreLogger interface for dependency injection
|
||||||
@@ -62,45 +69,45 @@ type MessageType string
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
// Bzzz coordination messages
|
// Bzzz coordination messages
|
||||||
TaskAnnouncement MessageType = "task_announcement"
|
TaskAnnouncement MessageType = "task_announcement"
|
||||||
TaskClaim MessageType = "task_claim"
|
TaskClaim MessageType = "task_claim"
|
||||||
TaskProgress MessageType = "task_progress"
|
TaskProgress MessageType = "task_progress"
|
||||||
TaskComplete MessageType = "task_complete"
|
TaskComplete MessageType = "task_complete"
|
||||||
CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change
|
CapabilityBcast MessageType = "capability_broadcast" // Only broadcast when capabilities change
|
||||||
AvailabilityBcast MessageType = "availability_broadcast" // Regular availability status
|
AvailabilityBcast MessageType = "availability_broadcast" // Regular availability status
|
||||||
|
|
||||||
// HMMM meta-discussion messages
|
// HMMM meta-discussion messages
|
||||||
MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion
|
MetaDiscussion MessageType = "meta_discussion" // Generic type for all discussion
|
||||||
TaskHelpRequest MessageType = "task_help_request" // Request for assistance
|
TaskHelpRequest MessageType = "task_help_request" // Request for assistance
|
||||||
TaskHelpResponse MessageType = "task_help_response" // Response to a help request
|
TaskHelpResponse MessageType = "task_help_response" // Response to a help request
|
||||||
CoordinationRequest MessageType = "coordination_request" // Request for coordination
|
CoordinationRequest MessageType = "coordination_request" // Request for coordination
|
||||||
CoordinationComplete MessageType = "coordination_complete" // Coordination session completed
|
CoordinationComplete MessageType = "coordination_complete" // Coordination session completed
|
||||||
DependencyAlert MessageType = "dependency_alert" // Dependency detected
|
DependencyAlert MessageType = "dependency_alert" // Dependency detected
|
||||||
EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed
|
EscalationTrigger MessageType = "escalation_trigger" // Human escalation needed
|
||||||
|
|
||||||
// Role-based collaboration messages
|
// Role-based collaboration messages
|
||||||
RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities
|
RoleAnnouncement MessageType = "role_announcement" // Agent announces its role and capabilities
|
||||||
ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise
|
ExpertiseRequest MessageType = "expertise_request" // Request for specific expertise
|
||||||
ExpertiseResponse MessageType = "expertise_response" // Response offering expertise
|
ExpertiseResponse MessageType = "expertise_response" // Response offering expertise
|
||||||
StatusUpdate MessageType = "status_update" // Regular status updates from agents
|
StatusUpdate MessageType = "status_update" // Regular status updates from agents
|
||||||
WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles
|
WorkAllocation MessageType = "work_allocation" // Allocation of work to specific roles
|
||||||
RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message
|
RoleCollaboration MessageType = "role_collaboration" // Cross-role collaboration message
|
||||||
MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship
|
MentorshipRequest MessageType = "mentorship_request" // Junior role requesting mentorship
|
||||||
MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship
|
MentorshipResponse MessageType = "mentorship_response" // Senior role providing mentorship
|
||||||
ProjectUpdate MessageType = "project_update" // Project-level status updates
|
ProjectUpdate MessageType = "project_update" // Project-level status updates
|
||||||
DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete
|
DeliverableReady MessageType = "deliverable_ready" // Notification that deliverable is complete
|
||||||
|
|
||||||
// RL Context Curator feedback messages
|
// RL Context Curator feedback messages
|
||||||
FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning
|
FeedbackEvent MessageType = "feedback_event" // Context feedback for RL learning
|
||||||
ContextRequest MessageType = "context_request" // Request context from HCFS
|
ContextRequest MessageType = "context_request" // Request context from HCFS
|
||||||
ContextResponse MessageType = "context_response" // Response with context data
|
ContextResponse MessageType = "context_response" // Response with context data
|
||||||
ContextUsage MessageType = "context_usage" // Report context usage patterns
|
ContextUsage MessageType = "context_usage" // Report context usage patterns
|
||||||
ContextRelevance MessageType = "context_relevance" // Report context relevance scoring
|
ContextRelevance MessageType = "context_relevance" // Report context relevance scoring
|
||||||
|
|
||||||
// SLURP event integration messages
|
// SLURP event integration messages
|
||||||
SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event
|
SlurpEventGenerated MessageType = "slurp_event_generated" // HMMM consensus generated SLURP event
|
||||||
SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt
|
SlurpEventAck MessageType = "slurp_event_ack" // Acknowledgment of SLURP event receipt
|
||||||
SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system
|
SlurpContextUpdate MessageType = "slurp_context_update" // Context update from SLURP system
|
||||||
)
|
)
|
||||||
|
|
||||||
// Message represents a Bzzz/Antennae message
|
// Message represents a Bzzz/Antennae message
|
||||||
@@ -110,14 +117,14 @@ type Message struct {
|
|||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
Data map[string]interface{} `json:"data"`
|
Data map[string]interface{} `json:"data"`
|
||||||
HopCount int `json:"hop_count,omitempty"` // For Antennae hop limiting
|
HopCount int `json:"hop_count,omitempty"` // For Antennae hop limiting
|
||||||
|
|
||||||
// Role-based collaboration fields
|
// Role-based collaboration fields
|
||||||
FromRole string `json:"from_role,omitempty"` // Role of sender
|
FromRole string `json:"from_role,omitempty"` // Role of sender
|
||||||
ToRoles []string `json:"to_roles,omitempty"` // Target roles
|
ToRoles []string `json:"to_roles,omitempty"` // Target roles
|
||||||
RequiredExpertise []string `json:"required_expertise,omitempty"` // Required expertise areas
|
RequiredExpertise []string `json:"required_expertise,omitempty"` // Required expertise areas
|
||||||
ProjectID string `json:"project_id,omitempty"` // Associated project
|
ProjectID string `json:"project_id,omitempty"` // Associated project
|
||||||
Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent)
|
Priority string `json:"priority,omitempty"` // Message priority (low, medium, high, urgent)
|
||||||
ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID
|
ThreadID string `json:"thread_id,omitempty"` // Conversation thread ID
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewPubSub creates a new PubSub instance for Bzzz coordination and HMMM meta-discussion
|
// NewPubSub creates a new PubSub instance for Bzzz coordination and HMMM meta-discussion
|
||||||
@@ -150,16 +157,17 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
|
|||||||
}
|
}
|
||||||
|
|
||||||
p := &PubSub{
|
p := &PubSub{
|
||||||
ps: ps,
|
ps: ps,
|
||||||
host: h,
|
host: h,
|
||||||
ctx: pubsubCtx,
|
ctx: pubsubCtx,
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
chorusTopicName: chorusTopic,
|
chorusTopicName: chorusTopic,
|
||||||
hmmmTopicName: hmmmTopic,
|
hmmmTopicName: hmmmTopic,
|
||||||
contextTopicName: contextTopic,
|
contextTopicName: contextTopic,
|
||||||
dynamicTopics: make(map[string]*pubsub.Topic),
|
dynamicTopics: make(map[string]*pubsub.Topic),
|
||||||
dynamicSubs: make(map[string]*pubsub.Subscription),
|
dynamicSubs: make(map[string]*pubsub.Subscription),
|
||||||
hypercoreLog: logger,
|
dynamicHandlers: make(map[string]func([]byte, peer.ID)),
|
||||||
|
hypercoreLog: logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join static topics
|
// Join static topics
|
||||||
@@ -177,6 +185,13 @@ func NewPubSubWithLogger(ctx context.Context, h host.Host, chorusTopic, hmmmTopi
|
|||||||
return p, nil
|
return p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetRedactor wires the SHHH sentinel so outbound messages are sanitized before publication.
|
||||||
|
func (p *PubSub) SetRedactor(redactor *shhh.Sentinel) {
|
||||||
|
p.redactorMux.Lock()
|
||||||
|
defer p.redactorMux.Unlock()
|
||||||
|
p.redactor = redactor
|
||||||
|
}
|
||||||
|
|
||||||
// SetHmmmMessageHandler sets the handler for incoming HMMM messages.
|
// SetHmmmMessageHandler sets the handler for incoming HMMM messages.
|
||||||
func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) {
|
func (p *PubSub) SetHmmmMessageHandler(handler func(msg Message, from peer.ID)) {
|
||||||
p.HmmmMessageHandler = handler
|
p.HmmmMessageHandler = handler
|
||||||
@@ -231,15 +246,21 @@ func (p *PubSub) joinStaticTopics() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// JoinDynamicTopic joins a new topic for a specific task
|
// subscribeDynamicTopic joins a topic and optionally assigns a raw handler.
|
||||||
func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
func (p *PubSub) subscribeDynamicTopic(topicName string, handler func([]byte, peer.ID)) error {
|
||||||
p.dynamicTopicsMux.Lock()
|
if topicName == "" {
|
||||||
defer p.dynamicTopicsMux.Unlock()
|
return fmt.Errorf("topic name cannot be empty")
|
||||||
p.dynamicSubsMux.Lock()
|
}
|
||||||
defer p.dynamicSubsMux.Unlock()
|
|
||||||
|
|
||||||
if _, exists := p.dynamicTopics[topicName]; exists {
|
p.dynamicTopicsMux.RLock()
|
||||||
return nil // Already joined
|
_, exists := p.dynamicTopics[topicName]
|
||||||
|
p.dynamicTopicsMux.RUnlock()
|
||||||
|
|
||||||
|
if exists {
|
||||||
|
p.dynamicHandlersMux.Lock()
|
||||||
|
p.dynamicHandlers[topicName] = handler
|
||||||
|
p.dynamicHandlersMux.Unlock()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
topic, err := p.ps.Join(topicName)
|
topic, err := p.ps.Join(topicName)
|
||||||
@@ -253,38 +274,68 @@ func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
|||||||
return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err)
|
return fmt.Errorf("failed to subscribe to dynamic topic %s: %w", topicName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.dynamicTopicsMux.Lock()
|
||||||
|
if _, already := p.dynamicTopics[topicName]; already {
|
||||||
|
p.dynamicTopicsMux.Unlock()
|
||||||
|
sub.Cancel()
|
||||||
|
topic.Close()
|
||||||
|
p.dynamicHandlersMux.Lock()
|
||||||
|
p.dynamicHandlers[topicName] = handler
|
||||||
|
p.dynamicHandlersMux.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
p.dynamicTopics[topicName] = topic
|
p.dynamicTopics[topicName] = topic
|
||||||
p.dynamicSubs[topicName] = sub
|
p.dynamicTopicsMux.Unlock()
|
||||||
|
|
||||||
// Start a handler for this new subscription
|
p.dynamicSubsMux.Lock()
|
||||||
go p.handleDynamicMessages(sub)
|
p.dynamicSubs[topicName] = sub
|
||||||
|
p.dynamicSubsMux.Unlock()
|
||||||
|
|
||||||
|
p.dynamicHandlersMux.Lock()
|
||||||
|
p.dynamicHandlers[topicName] = handler
|
||||||
|
p.dynamicHandlersMux.Unlock()
|
||||||
|
|
||||||
|
go p.handleDynamicMessages(topicName, sub)
|
||||||
|
|
||||||
fmt.Printf("✅ Joined dynamic topic: %s\n", topicName)
|
fmt.Printf("✅ Joined dynamic topic: %s\n", topicName)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// JoinDynamicTopic joins a new topic for a specific task
|
||||||
|
func (p *PubSub) JoinDynamicTopic(topicName string) error {
|
||||||
|
return p.subscribeDynamicTopic(topicName, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubscribeRawTopic joins a topic and delivers raw payloads to the provided handler.
|
||||||
|
func (p *PubSub) SubscribeRawTopic(topicName string, handler func([]byte, peer.ID)) error {
|
||||||
|
if handler == nil {
|
||||||
|
return fmt.Errorf("handler cannot be nil")
|
||||||
|
}
|
||||||
|
return p.subscribeDynamicTopic(topicName, handler)
|
||||||
|
}
|
||||||
|
|
||||||
// JoinRoleBasedTopics joins topics based on role and expertise
|
// JoinRoleBasedTopics joins topics based on role and expertise
|
||||||
func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error {
|
func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo []string) error {
|
||||||
var topicsToJoin []string
|
var topicsToJoin []string
|
||||||
|
|
||||||
// Join role-specific topic
|
// Join role-specific topic
|
||||||
if role != "" {
|
if role != "" {
|
||||||
roleTopic := fmt.Sprintf("CHORUS/roles/%s/v1", strings.ToLower(strings.ReplaceAll(role, " ", "_")))
|
roleTopic := fmt.Sprintf("CHORUS/roles/%s/v1", strings.ToLower(strings.ReplaceAll(role, " ", "_")))
|
||||||
topicsToJoin = append(topicsToJoin, roleTopic)
|
topicsToJoin = append(topicsToJoin, roleTopic)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join expertise-specific topics
|
// Join expertise-specific topics
|
||||||
for _, exp := range expertise {
|
for _, exp := range expertise {
|
||||||
expertiseTopic := fmt.Sprintf("CHORUS/expertise/%s/v1", strings.ToLower(strings.ReplaceAll(exp, " ", "_")))
|
expertiseTopic := fmt.Sprintf("CHORUS/expertise/%s/v1", strings.ToLower(strings.ReplaceAll(exp, " ", "_")))
|
||||||
topicsToJoin = append(topicsToJoin, expertiseTopic)
|
topicsToJoin = append(topicsToJoin, expertiseTopic)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join reporting hierarchy topics
|
// Join reporting hierarchy topics
|
||||||
for _, supervisor := range reportsTo {
|
for _, supervisor := range reportsTo {
|
||||||
supervisorTopic := fmt.Sprintf("CHORUS/hierarchy/%s/v1", strings.ToLower(strings.ReplaceAll(supervisor, " ", "_")))
|
supervisorTopic := fmt.Sprintf("CHORUS/hierarchy/%s/v1", strings.ToLower(strings.ReplaceAll(supervisor, " ", "_")))
|
||||||
topicsToJoin = append(topicsToJoin, supervisorTopic)
|
topicsToJoin = append(topicsToJoin, supervisorTopic)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Join all identified topics
|
// Join all identified topics
|
||||||
for _, topicName := range topicsToJoin {
|
for _, topicName := range topicsToJoin {
|
||||||
if err := p.JoinDynamicTopic(topicName); err != nil {
|
if err := p.JoinDynamicTopic(topicName); err != nil {
|
||||||
@@ -292,7 +343,7 @@ func (p *PubSub) JoinRoleBasedTopics(role string, expertise []string, reportsTo
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("🎯 Joined %d role-based topics for role: %s\n", len(topicsToJoin), role)
|
fmt.Printf("🎯 Joined %d role-based topics for role: %s\n", len(topicsToJoin), role)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -302,7 +353,7 @@ func (p *PubSub) JoinProjectTopic(projectID string) error {
|
|||||||
if projectID == "" {
|
if projectID == "" {
|
||||||
return fmt.Errorf("project ID cannot be empty")
|
return fmt.Errorf("project ID cannot be empty")
|
||||||
}
|
}
|
||||||
|
|
||||||
topicName := fmt.Sprintf("CHORUS/projects/%s/coordination/v1", projectID)
|
topicName := fmt.Sprintf("CHORUS/projects/%s/coordination/v1", projectID)
|
||||||
return p.JoinDynamicTopic(topicName)
|
return p.JoinDynamicTopic(topicName)
|
||||||
}
|
}
|
||||||
@@ -324,6 +375,10 @@ func (p *PubSub) LeaveDynamicTopic(topicName string) {
|
|||||||
delete(p.dynamicTopics, topicName)
|
delete(p.dynamicTopics, topicName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.dynamicHandlersMux.Lock()
|
||||||
|
delete(p.dynamicHandlers, topicName)
|
||||||
|
p.dynamicHandlersMux.Unlock()
|
||||||
|
|
||||||
fmt.Printf("🗑️ Left dynamic topic: %s\n", topicName)
|
fmt.Printf("🗑️ Left dynamic topic: %s\n", topicName)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -337,11 +392,12 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da
|
|||||||
return fmt.Errorf("not subscribed to dynamic topic: %s", topicName)
|
return fmt.Errorf("not subscribed to dynamic topic: %s", topicName)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
payload := p.sanitizePayload(topicName, msgType, data)
|
||||||
msg := Message{
|
msg := Message{
|
||||||
Type: msgType,
|
Type: msgType,
|
||||||
From: p.host.ID().String(),
|
From: p.host.ID().String(),
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Data: data,
|
Data: payload,
|
||||||
}
|
}
|
||||||
|
|
||||||
msgBytes, err := json.Marshal(msg)
|
msgBytes, err := json.Marshal(msg)
|
||||||
@@ -356,34 +412,35 @@ func (p *PubSub) PublishToDynamicTopic(topicName string, msgType MessageType, da
|
|||||||
// wrapping it in the CHORUS Message envelope. Intended for HMMM per-issue rooms
|
// wrapping it in the CHORUS Message envelope. Intended for HMMM per-issue rooms
|
||||||
// or other modules that maintain their own schemas.
|
// or other modules that maintain their own schemas.
|
||||||
func (p *PubSub) PublishRaw(topicName string, payload []byte) error {
|
func (p *PubSub) PublishRaw(topicName string, payload []byte) error {
|
||||||
// Dynamic topic
|
// Dynamic topic
|
||||||
p.dynamicTopicsMux.RLock()
|
p.dynamicTopicsMux.RLock()
|
||||||
if topic, exists := p.dynamicTopics[topicName]; exists {
|
if topic, exists := p.dynamicTopics[topicName]; exists {
|
||||||
p.dynamicTopicsMux.RUnlock()
|
p.dynamicTopicsMux.RUnlock()
|
||||||
return topic.Publish(p.ctx, payload)
|
return topic.Publish(p.ctx, payload)
|
||||||
}
|
}
|
||||||
p.dynamicTopicsMux.RUnlock()
|
p.dynamicTopicsMux.RUnlock()
|
||||||
|
|
||||||
// Static topics by name
|
// Static topics by name
|
||||||
switch topicName {
|
switch topicName {
|
||||||
case p.chorusTopicName:
|
case p.chorusTopicName:
|
||||||
return p.chorusTopic.Publish(p.ctx, payload)
|
return p.chorusTopic.Publish(p.ctx, payload)
|
||||||
case p.hmmmTopicName:
|
case p.hmmmTopicName:
|
||||||
return p.hmmmTopic.Publish(p.ctx, payload)
|
return p.hmmmTopic.Publish(p.ctx, payload)
|
||||||
case p.contextTopicName:
|
case p.contextTopicName:
|
||||||
return p.contextTopic.Publish(p.ctx, payload)
|
return p.contextTopic.Publish(p.ctx, payload)
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("not subscribed to topic: %s", topicName)
|
return fmt.Errorf("not subscribed to topic: %s", topicName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// PublishBzzzMessage publishes a message to the Bzzz coordination topic
|
// PublishBzzzMessage publishes a message to the Bzzz coordination topic
|
||||||
func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error {
|
func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interface{}) error {
|
||||||
|
payload := p.sanitizePayload(p.chorusTopicName, msgType, data)
|
||||||
msg := Message{
|
msg := Message{
|
||||||
Type: msgType,
|
Type: msgType,
|
||||||
From: p.host.ID().String(),
|
From: p.host.ID().String(),
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Data: data,
|
Data: payload,
|
||||||
}
|
}
|
||||||
|
|
||||||
msgBytes, err := json.Marshal(msg)
|
msgBytes, err := json.Marshal(msg)
|
||||||
@@ -396,11 +453,12 @@ func (p *PubSub) PublishBzzzMessage(msgType MessageType, data map[string]interfa
|
|||||||
|
|
||||||
// PublishHmmmMessage publishes a message to the HMMM meta-discussion topic
|
// PublishHmmmMessage publishes a message to the HMMM meta-discussion topic
|
||||||
func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error {
|
func (p *PubSub) PublishHmmmMessage(msgType MessageType, data map[string]interface{}) error {
|
||||||
|
payload := p.sanitizePayload(p.hmmmTopicName, msgType, data)
|
||||||
msg := Message{
|
msg := Message{
|
||||||
Type: msgType,
|
Type: msgType,
|
||||||
From: p.host.ID().String(),
|
From: p.host.ID().String(),
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Data: data,
|
Data: payload,
|
||||||
}
|
}
|
||||||
|
|
||||||
msgBytes, err := json.Marshal(msg)
|
msgBytes, err := json.Marshal(msg)
|
||||||
@@ -425,11 +483,12 @@ func (p *PubSub) SetAntennaeMessageHandler(handler func(msg Message, from peer.I
|
|||||||
|
|
||||||
// PublishContextFeedbackMessage publishes a message to the Context Feedback topic
|
// PublishContextFeedbackMessage publishes a message to the Context Feedback topic
|
||||||
func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error {
|
func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[string]interface{}) error {
|
||||||
|
payload := p.sanitizePayload(p.contextTopicName, msgType, data)
|
||||||
msg := Message{
|
msg := Message{
|
||||||
Type: msgType,
|
Type: msgType,
|
||||||
From: p.host.ID().String(),
|
From: p.host.ID().String(),
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Data: data,
|
Data: payload,
|
||||||
}
|
}
|
||||||
|
|
||||||
msgBytes, err := json.Marshal(msg)
|
msgBytes, err := json.Marshal(msg)
|
||||||
@@ -442,11 +501,16 @@ func (p *PubSub) PublishContextFeedbackMessage(msgType MessageType, data map[str
|
|||||||
|
|
||||||
// PublishRoleBasedMessage publishes a role-based collaboration message
|
// PublishRoleBasedMessage publishes a role-based collaboration message
|
||||||
func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error {
|
func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]interface{}, opts MessageOptions) error {
|
||||||
|
topicName := p.chorusTopicName
|
||||||
|
if isRoleMessage(msgType) {
|
||||||
|
topicName = p.hmmmTopicName
|
||||||
|
}
|
||||||
|
payload := p.sanitizePayload(topicName, msgType, data)
|
||||||
msg := Message{
|
msg := Message{
|
||||||
Type: msgType,
|
Type: msgType,
|
||||||
From: p.host.ID().String(),
|
From: p.host.ID().String(),
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
Data: data,
|
Data: payload,
|
||||||
FromRole: opts.FromRole,
|
FromRole: opts.FromRole,
|
||||||
ToRoles: opts.ToRoles,
|
ToRoles: opts.ToRoles,
|
||||||
RequiredExpertise: opts.RequiredExpertise,
|
RequiredExpertise: opts.RequiredExpertise,
|
||||||
@@ -462,10 +526,8 @@ func (p *PubSub) PublishRoleBasedMessage(msgType MessageType, data map[string]in
|
|||||||
|
|
||||||
// Determine which topic to use based on message type
|
// Determine which topic to use based on message type
|
||||||
var topic *pubsub.Topic
|
var topic *pubsub.Topic
|
||||||
switch msgType {
|
switch {
|
||||||
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate,
|
case isRoleMessage(msgType):
|
||||||
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
|
|
||||||
ProjectUpdate, DeliverableReady:
|
|
||||||
topic = p.hmmmTopic // Use HMMM topic for role-based messages
|
topic = p.hmmmTopic // Use HMMM topic for role-based messages
|
||||||
default:
|
default:
|
||||||
topic = p.chorusTopic // Default to Bzzz topic
|
topic = p.chorusTopic // Default to Bzzz topic
|
||||||
@@ -492,14 +554,14 @@ func (p *PubSub) PublishSlurpContextUpdate(data map[string]interface{}) error {
|
|||||||
// PublishSlurpIntegrationEvent publishes a generic SLURP integration event
|
// PublishSlurpIntegrationEvent publishes a generic SLURP integration event
|
||||||
func (p *PubSub) PublishSlurpIntegrationEvent(eventType string, discussionID string, slurpEvent map[string]interface{}) error {
|
func (p *PubSub) PublishSlurpIntegrationEvent(eventType string, discussionID string, slurpEvent map[string]interface{}) error {
|
||||||
data := map[string]interface{}{
|
data := map[string]interface{}{
|
||||||
"event_type": eventType,
|
"event_type": eventType,
|
||||||
"discussion_id": discussionID,
|
"discussion_id": discussionID,
|
||||||
"slurp_event": slurpEvent,
|
"slurp_event": slurpEvent,
|
||||||
"timestamp": time.Now(),
|
"timestamp": time.Now(),
|
||||||
"source": "hmmm-slurp-integration",
|
"source": "hmmm-slurp-integration",
|
||||||
"peer_id": p.host.ID().String(),
|
"peer_id": p.host.ID().String(),
|
||||||
}
|
}
|
||||||
|
|
||||||
return p.PublishSlurpEventGenerated(data)
|
return p.PublishSlurpEventGenerated(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -604,15 +666,23 @@ func (p *PubSub) handleContextFeedbackMessages() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getDynamicHandler returns the raw handler for a topic if registered.
|
||||||
|
func (p *PubSub) getDynamicHandler(topicName string) func([]byte, peer.ID) {
|
||||||
|
p.dynamicHandlersMux.RLock()
|
||||||
|
handler := p.dynamicHandlers[topicName]
|
||||||
|
p.dynamicHandlersMux.RUnlock()
|
||||||
|
return handler
|
||||||
|
}
|
||||||
|
|
||||||
// handleDynamicMessages processes messages from a dynamic topic subscription
|
// handleDynamicMessages processes messages from a dynamic topic subscription
|
||||||
func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
|
func (p *PubSub) handleDynamicMessages(topicName string, sub *pubsub.Subscription) {
|
||||||
for {
|
for {
|
||||||
msg, err := sub.Next(p.ctx)
|
msg, err := sub.Next(p.ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if p.ctx.Err() != nil || err.Error() == "subscription cancelled" {
|
if p.ctx.Err() != nil || err.Error() == "subscription cancelled" {
|
||||||
return // Subscription was cancelled, exit handler
|
return // Subscription was cancelled, exit handler
|
||||||
}
|
}
|
||||||
fmt.Printf("❌ Error receiving dynamic message: %v\n", err)
|
fmt.Printf("❌ Error receiving dynamic message on %s: %v\n", topicName, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -620,13 +690,18 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var dynamicMsg Message
|
if handler := p.getDynamicHandler(topicName); handler != nil {
|
||||||
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil {
|
handler(msg.Data, msg.ReceivedFrom)
|
||||||
fmt.Printf("❌ Failed to unmarshal dynamic message: %v\n", err)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the main HMMM handler for all dynamic messages
|
var dynamicMsg Message
|
||||||
|
if err := json.Unmarshal(msg.Data, &dynamicMsg); err != nil {
|
||||||
|
fmt.Printf("❌ Failed to unmarshal dynamic message on %s: %v\n", topicName, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the main HMMM handler for all dynamic messages without custom handlers
|
||||||
if p.HmmmMessageHandler != nil {
|
if p.HmmmMessageHandler != nil {
|
||||||
p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom)
|
p.HmmmMessageHandler(dynamicMsg, msg.ReceivedFrom)
|
||||||
}
|
}
|
||||||
@@ -636,7 +711,7 @@ func (p *PubSub) handleDynamicMessages(sub *pubsub.Subscription) {
|
|||||||
// processBzzzMessage handles different types of Bzzz coordination messages
|
// processBzzzMessage handles different types of Bzzz coordination messages
|
||||||
func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) {
|
func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) {
|
||||||
fmt.Printf("🐝 Bzzz [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data)
|
fmt.Printf("🐝 Bzzz [%s] from %s: %v\n", msg.Type, from.ShortString(), msg.Data)
|
||||||
|
|
||||||
// Log to hypercore if logger is available
|
// Log to hypercore if logger is available
|
||||||
if p.hypercoreLog != nil {
|
if p.hypercoreLog != nil {
|
||||||
logData := map[string]interface{}{
|
logData := map[string]interface{}{
|
||||||
@@ -647,7 +722,7 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) {
|
|||||||
"data": msg.Data,
|
"data": msg.Data,
|
||||||
"topic": "CHORUS",
|
"topic": "CHORUS",
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map pubsub message types to hypercore log types
|
// Map pubsub message types to hypercore log types
|
||||||
var logType string
|
var logType string
|
||||||
switch msg.Type {
|
switch msg.Type {
|
||||||
@@ -666,7 +741,7 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) {
|
|||||||
default:
|
default:
|
||||||
logType = "network_event"
|
logType = "network_event"
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
||||||
fmt.Printf("❌ Failed to log Bzzz message to hypercore: %v\n", err)
|
fmt.Printf("❌ Failed to log Bzzz message to hypercore: %v\n", err)
|
||||||
}
|
}
|
||||||
@@ -675,9 +750,9 @@ func (p *PubSub) processBzzzMessage(msg Message, from peer.ID) {
|
|||||||
|
|
||||||
// processHmmmMessage provides default handling for HMMM messages if no external handler is set
|
// processHmmmMessage provides default handling for HMMM messages if no external handler is set
|
||||||
func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) {
|
func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) {
|
||||||
fmt.Printf("🎯 Default HMMM Handler [%s] from %s: %v\n",
|
fmt.Printf("🎯 Default HMMM Handler [%s] from %s: %v\n",
|
||||||
msg.Type, from.ShortString(), msg.Data)
|
msg.Type, from.ShortString(), msg.Data)
|
||||||
|
|
||||||
// Log to hypercore if logger is available
|
// Log to hypercore if logger is available
|
||||||
if p.hypercoreLog != nil {
|
if p.hypercoreLog != nil {
|
||||||
logData := map[string]interface{}{
|
logData := map[string]interface{}{
|
||||||
@@ -694,7 +769,7 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) {
|
|||||||
"priority": msg.Priority,
|
"priority": msg.Priority,
|
||||||
"thread_id": msg.ThreadID,
|
"thread_id": msg.ThreadID,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map pubsub message types to hypercore log types
|
// Map pubsub message types to hypercore log types
|
||||||
var logType string
|
var logType string
|
||||||
switch msg.Type {
|
switch msg.Type {
|
||||||
@@ -717,7 +792,7 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) {
|
|||||||
default:
|
default:
|
||||||
logType = "collaboration"
|
logType = "collaboration"
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
||||||
fmt.Printf("❌ Failed to log HMMM message to hypercore: %v\n", err)
|
fmt.Printf("❌ Failed to log HMMM message to hypercore: %v\n", err)
|
||||||
}
|
}
|
||||||
@@ -726,25 +801,25 @@ func (p *PubSub) processHmmmMessage(msg Message, from peer.ID) {
|
|||||||
|
|
||||||
// processContextFeedbackMessage provides default handling for context feedback messages if no external handler is set
|
// processContextFeedbackMessage provides default handling for context feedback messages if no external handler is set
|
||||||
func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
|
func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
|
||||||
fmt.Printf("🧠 Context Feedback [%s] from %s: %v\n",
|
fmt.Printf("🧠 Context Feedback [%s] from %s: %v\n",
|
||||||
msg.Type, from.ShortString(), msg.Data)
|
msg.Type, from.ShortString(), msg.Data)
|
||||||
|
|
||||||
// Log to hypercore if logger is available
|
// Log to hypercore if logger is available
|
||||||
if p.hypercoreLog != nil {
|
if p.hypercoreLog != nil {
|
||||||
logData := map[string]interface{}{
|
logData := map[string]interface{}{
|
||||||
"message_type": string(msg.Type),
|
"message_type": string(msg.Type),
|
||||||
"from_peer": from.String(),
|
"from_peer": from.String(),
|
||||||
"from_short": from.ShortString(),
|
"from_short": from.ShortString(),
|
||||||
"timestamp": msg.Timestamp,
|
"timestamp": msg.Timestamp,
|
||||||
"data": msg.Data,
|
"data": msg.Data,
|
||||||
"topic": "context_feedback",
|
"topic": "context_feedback",
|
||||||
"from_role": msg.FromRole,
|
"from_role": msg.FromRole,
|
||||||
"to_roles": msg.ToRoles,
|
"to_roles": msg.ToRoles,
|
||||||
"project_id": msg.ProjectID,
|
"project_id": msg.ProjectID,
|
||||||
"priority": msg.Priority,
|
"priority": msg.Priority,
|
||||||
"thread_id": msg.ThreadID,
|
"thread_id": msg.ThreadID,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map context feedback message types to hypercore log types
|
// Map context feedback message types to hypercore log types
|
||||||
var logType string
|
var logType string
|
||||||
switch msg.Type {
|
switch msg.Type {
|
||||||
@@ -757,17 +832,79 @@ func (p *PubSub) processContextFeedbackMessage(msg Message, from peer.ID) {
|
|||||||
default:
|
default:
|
||||||
logType = "context_feedback"
|
logType = "context_feedback"
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
if err := p.hypercoreLog.AppendString(logType, logData); err != nil {
|
||||||
fmt.Printf("❌ Failed to log Context Feedback message to hypercore: %v\n", err)
|
fmt.Printf("❌ Failed to log Context Feedback message to hypercore: %v\n", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *PubSub) sanitizePayload(topic string, msgType MessageType, data map[string]interface{}) map[string]interface{} {
|
||||||
|
if data == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cloned := clonePayloadMap(data)
|
||||||
|
p.redactorMux.RLock()
|
||||||
|
redactor := p.redactor
|
||||||
|
p.redactorMux.RUnlock()
|
||||||
|
if redactor != nil {
|
||||||
|
labels := map[string]string{
|
||||||
|
"source": "pubsub",
|
||||||
|
"topic": topic,
|
||||||
|
"message_type": string(msgType),
|
||||||
|
}
|
||||||
|
redactor.RedactMapWithLabels(context.Background(), cloned, labels)
|
||||||
|
}
|
||||||
|
return cloned
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRoleMessage(msgType MessageType) bool {
|
||||||
|
switch msgType {
|
||||||
|
case RoleAnnouncement, ExpertiseRequest, ExpertiseResponse, StatusUpdate,
|
||||||
|
WorkAllocation, RoleCollaboration, MentorshipRequest, MentorshipResponse,
|
||||||
|
ProjectUpdate, DeliverableReady:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func clonePayloadMap(in map[string]interface{}) map[string]interface{} {
|
||||||
|
if in == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make(map[string]interface{}, len(in))
|
||||||
|
for k, v := range in {
|
||||||
|
out[k] = clonePayloadValue(v)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func clonePayloadValue(v interface{}) interface{} {
|
||||||
|
switch tv := v.(type) {
|
||||||
|
case map[string]interface{}:
|
||||||
|
return clonePayloadMap(tv)
|
||||||
|
case []interface{}:
|
||||||
|
return clonePayloadSlice(tv)
|
||||||
|
case []string:
|
||||||
|
return append([]string(nil), tv...)
|
||||||
|
default:
|
||||||
|
return tv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func clonePayloadSlice(in []interface{}) []interface{} {
|
||||||
|
out := make([]interface{}, len(in))
|
||||||
|
for i, val := range in {
|
||||||
|
out[i] = clonePayloadValue(val)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// Close shuts down the PubSub instance
|
// Close shuts down the PubSub instance
|
||||||
func (p *PubSub) Close() error {
|
func (p *PubSub) Close() error {
|
||||||
p.cancel()
|
p.cancel()
|
||||||
|
|
||||||
if p.chorusSub != nil {
|
if p.chorusSub != nil {
|
||||||
p.chorusSub.Cancel()
|
p.chorusSub.Cancel()
|
||||||
}
|
}
|
||||||
@@ -777,7 +914,7 @@ func (p *PubSub) Close() error {
|
|||||||
if p.contextSub != nil {
|
if p.contextSub != nil {
|
||||||
p.contextSub.Cancel()
|
p.contextSub.Cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.chorusTopic != nil {
|
if p.chorusTopic != nil {
|
||||||
p.chorusTopic.Close()
|
p.chorusTopic.Close()
|
||||||
}
|
}
|
||||||
@@ -787,7 +924,13 @@ func (p *PubSub) Close() error {
|
|||||||
if p.contextTopic != nil {
|
if p.contextTopic != nil {
|
||||||
p.contextTopic.Close()
|
p.contextTopic.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.dynamicSubsMux.Lock()
|
||||||
|
for _, sub := range p.dynamicSubs {
|
||||||
|
sub.Cancel()
|
||||||
|
}
|
||||||
|
p.dynamicSubsMux.Unlock()
|
||||||
|
|
||||||
p.dynamicTopicsMux.Lock()
|
p.dynamicTopicsMux.Lock()
|
||||||
for _, topic := range p.dynamicTopics {
|
for _, topic := range p.dynamicTopics {
|
||||||
topic.Close()
|
topic.Close()
|
||||||
|
|||||||
Reference in New Issue
Block a user