Wire SLURP persistence and add restart coverage

This commit is contained in:
anthonyrawlins
2025-09-27 15:26:25 +10:00
parent 17673c38a6
commit 0b670a535d
5 changed files with 1061 additions and 369 deletions

View File

@@ -145,7 +145,7 @@ services:
start_period: 10s start_period: 10s
whoosh: whoosh:
image: anthonyrawlins/whoosh:scaling-v1.0.0 image: anthonyrawlins/whoosh:latest
ports: ports:
- target: 8080 - target: 8080
published: 8800 published: 8800
@@ -200,6 +200,9 @@ services:
WHOOSH_BACKBEAT_AGENT_ID: "whoosh" WHOOSH_BACKBEAT_AGENT_ID: "whoosh"
WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222" WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"
# Docker integration configuration (disabled for agent assignment architecture)
WHOOSH_DOCKER_ENABLED: "false"
secrets: secrets:
- whoosh_db_password - whoosh_db_password
- gitea_token - gitea_token
@@ -207,8 +210,8 @@ services:
- jwt_secret - jwt_secret
- service_tokens - service_tokens
- redis_password - redis_password
volumes: # volumes:
- /var/run/docker.sock:/var/run/docker.sock # - /var/run/docker.sock:/var/run/docker.sock # Disabled for agent assignment architecture
deploy: deploy:
replicas: 2 replicas: 2
restart_policy: restart_policy:

View File

@@ -0,0 +1,14 @@
# SEC-SLURP 1.1 Persistence Wiring Report
## Summary of Changes
- Added LevelDB-backed persistence scaffolding in `pkg/slurp/slurp.go`, capturing the storage path, local storage handle, and the roadmap-tagged metrics helpers required for SEC-SLURP1.1.
- Upgraded SLURPs lifecycle so initialization bootstraps cached context data from disk, cache misses hydrate from persistence, successful `UpsertContext` calls write back to LevelDB, and shutdown closes the store with error telemetry.
- Introduced `pkg/slurp/slurp_persistence_test.go` to confirm contexts survive process restarts and can be resolved after clearing in-memory caches.
- Instrumented cache/persistence metrics so hit/miss ratios and storage failures are tracked for observability.
- Attempted `GOWORK=off go test ./pkg/slurp`; execution was blocked by legacy references to `config.Authority*` symbols in `pkg/slurp/context`, so the new test did not run.
## Recommended Next Steps
- Address the `config.Authority*` symbol drift (or scope down the impacted packages) so the SLURP test suite can compile cleanly, then rerun `GOWORK=off go test ./pkg/slurp` to validate persistence changes.
- Feed the durable store into the resolver and temporal graph implementations to finish the remaining Phase1 SLURP roadmap items.
- Expand Prometheus metrics and logging to track cache hit/miss ratios plus persistence errors for SEC-SLURP observability goals.
- Review unrelated changes on `feature/phase-4-real-providers` (e.g., docker-compose edits) and either align them with this roadmap work or revert to keep the branch focused.

View File

@@ -8,12 +8,11 @@ import (
"sync" "sync"
"time" "time"
"chorus/pkg/election"
"chorus/pkg/dht" "chorus/pkg/dht"
"chorus/pkg/ucxl" "chorus/pkg/election"
slurpContext "chorus/pkg/slurp/context"
"chorus/pkg/slurp/intelligence" "chorus/pkg/slurp/intelligence"
"chorus/pkg/slurp/storage" "chorus/pkg/slurp/storage"
slurpContext "chorus/pkg/slurp/context"
) )
// ContextManager handles leader-only context generation duties // ContextManager handles leader-only context generation duties
@@ -25,34 +24,34 @@ type ContextManager interface {
// RequestContextGeneration queues a context generation request // RequestContextGeneration queues a context generation request
// Only the leader processes these requests to prevent conflicts // Only the leader processes these requests to prevent conflicts
RequestContextGeneration(req *ContextGenerationRequest) error RequestContextGeneration(req *ContextGenerationRequest) error
// RequestFromLeader allows non-leader nodes to request context from leader // RequestFromLeader allows non-leader nodes to request context from leader
RequestFromLeader(req *ContextGenerationRequest) (*ContextGenerationResult, error) RequestFromLeader(req *ContextGenerationRequest) (*ContextGenerationResult, error)
// GetGenerationStatus returns status of context generation operations // GetGenerationStatus returns status of context generation operations
GetGenerationStatus() (*GenerationStatus, error) GetGenerationStatus() (*GenerationStatus, error)
// GetQueueStatus returns status of the generation queue // GetQueueStatus returns status of the generation queue
GetQueueStatus() (*QueueStatus, error) GetQueueStatus() (*QueueStatus, error)
// CancelGeneration cancels pending or active generation task // CancelGeneration cancels pending or active generation task
CancelGeneration(taskID string) error CancelGeneration(taskID string) error
// PrioritizeGeneration changes priority of queued generation task // PrioritizeGeneration changes priority of queued generation task
PrioritizeGeneration(taskID string, priority Priority) error PrioritizeGeneration(taskID string, priority Priority) error
// IsLeader returns whether this node is the current leader // IsLeader returns whether this node is the current leader
IsLeader() bool IsLeader() bool
// WaitForLeadership blocks until this node becomes leader // WaitForLeadership blocks until this node becomes leader
WaitForLeadership(ctx context.Context) error WaitForLeadership(ctx context.Context) error
// GetLeaderInfo returns information about current leader // GetLeaderInfo returns information about current leader
GetLeaderInfo() (*LeaderInfo, error) GetLeaderInfo() (*LeaderInfo, error)
// TransferLeadership initiates graceful leadership transfer // TransferLeadership initiates graceful leadership transfer
TransferLeadership(ctx context.Context, targetNodeID string) error TransferLeadership(ctx context.Context, targetNodeID string) error
// GetManagerStats returns manager performance statistics // GetManagerStats returns manager performance statistics
GetManagerStats() (*ManagerStatistics, error) GetManagerStats() (*ManagerStatistics, error)
} }
@@ -64,25 +63,25 @@ type ContextManager interface {
type GenerationCoordinator interface { type GenerationCoordinator interface {
// CoordinateGeneration coordinates generation of context across cluster // CoordinateGeneration coordinates generation of context across cluster
CoordinateGeneration(ctx context.Context, req *ContextGenerationRequest) (*CoordinationResult, error) CoordinateGeneration(ctx context.Context, req *ContextGenerationRequest) (*CoordinationResult, error)
// DistributeGeneration distributes generation task to appropriate node // DistributeGeneration distributes generation task to appropriate node
DistributeGeneration(ctx context.Context, task *GenerationTask) error DistributeGeneration(ctx context.Context, task *GenerationTask) error
// CollectGenerationResults collects results from distributed generation // CollectGenerationResults collects results from distributed generation
CollectGenerationResults(ctx context.Context, taskID string) (*GenerationResults, error) CollectGenerationResults(ctx context.Context, taskID string) (*GenerationResults, error)
// CheckGenerationStatus checks status of distributed generation // CheckGenerationStatus checks status of distributed generation
CheckGenerationStatus(ctx context.Context, taskID string) (*TaskStatus, error) CheckGenerationStatus(ctx context.Context, taskID string) (*TaskStatus, error)
// RebalanceLoad rebalances generation load across cluster nodes // RebalanceLoad rebalances generation load across cluster nodes
RebalanceLoad(ctx context.Context) (*RebalanceResult, error) RebalanceLoad(ctx context.Context) (*RebalanceResult, error)
// GetClusterCapacity returns current cluster generation capacity // GetClusterCapacity returns current cluster generation capacity
GetClusterCapacity() (*ClusterCapacity, error) GetClusterCapacity() (*ClusterCapacity, error)
// SetGenerationPolicy configures generation coordination policy // SetGenerationPolicy configures generation coordination policy
SetGenerationPolicy(policy *GenerationPolicy) error SetGenerationPolicy(policy *GenerationPolicy) error
// GetCoordinationStats returns coordination performance statistics // GetCoordinationStats returns coordination performance statistics
GetCoordinationStats() (*CoordinationStatistics, error) GetCoordinationStats() (*CoordinationStatistics, error)
} }
@@ -95,31 +94,31 @@ type GenerationCoordinator interface {
type QueueManager interface { type QueueManager interface {
// EnqueueRequest adds request to generation queue // EnqueueRequest adds request to generation queue
EnqueueRequest(req *ContextGenerationRequest) error EnqueueRequest(req *ContextGenerationRequest) error
// DequeueRequest gets next request from queue // DequeueRequest gets next request from queue
DequeueRequest() (*ContextGenerationRequest, error) DequeueRequest() (*ContextGenerationRequest, error)
// PeekQueue shows next request without removing it // PeekQueue shows next request without removing it
PeekQueue() (*ContextGenerationRequest, error) PeekQueue() (*ContextGenerationRequest, error)
// UpdateRequestPriority changes priority of queued request // UpdateRequestPriority changes priority of queued request
UpdateRequestPriority(requestID string, priority Priority) error UpdateRequestPriority(requestID string, priority Priority) error
// CancelRequest removes request from queue // CancelRequest removes request from queue
CancelRequest(requestID string) error CancelRequest(requestID string) error
// GetQueueLength returns current queue length // GetQueueLength returns current queue length
GetQueueLength() int GetQueueLength() int
// GetQueuedRequests returns all queued requests // GetQueuedRequests returns all queued requests
GetQueuedRequests() ([]*ContextGenerationRequest, error) GetQueuedRequests() ([]*ContextGenerationRequest, error)
// ClearQueue removes all requests from queue // ClearQueue removes all requests from queue
ClearQueue() error ClearQueue() error
// SetQueuePolicy configures queue management policy // SetQueuePolicy configures queue management policy
SetQueuePolicy(policy *QueuePolicy) error SetQueuePolicy(policy *QueuePolicy) error
// GetQueueStats returns queue performance statistics // GetQueueStats returns queue performance statistics
GetQueueStats() (*QueueStatistics, error) GetQueueStats() (*QueueStatistics, error)
} }
@@ -131,25 +130,25 @@ type QueueManager interface {
type FailoverManager interface { type FailoverManager interface {
// PrepareFailover prepares current state for potential failover // PrepareFailover prepares current state for potential failover
PrepareFailover(ctx context.Context) (*FailoverState, error) PrepareFailover(ctx context.Context) (*FailoverState, error)
// ExecuteFailover executes failover to become new leader // ExecuteFailover executes failover to become new leader
ExecuteFailover(ctx context.Context, previousState *FailoverState) error ExecuteFailover(ctx context.Context, previousState *FailoverState) error
// TransferState transfers leadership state to another node // TransferState transfers leadership state to another node
TransferState(ctx context.Context, targetNodeID string) error TransferState(ctx context.Context, targetNodeID string) error
// ReceiveState receives leadership state from previous leader // ReceiveState receives leadership state from previous leader
ReceiveState(ctx context.Context, state *FailoverState) error ReceiveState(ctx context.Context, state *FailoverState) error
// ValidateState validates received failover state // ValidateState validates received failover state
ValidateState(state *FailoverState) (*StateValidation, error) ValidateState(state *FailoverState) (*StateValidation, error)
// RecoverFromFailover recovers operations after failover // RecoverFromFailover recovers operations after failover
RecoverFromFailover(ctx context.Context) (*RecoveryResult, error) RecoverFromFailover(ctx context.Context) (*RecoveryResult, error)
// GetFailoverHistory returns history of failover events // GetFailoverHistory returns history of failover events
GetFailoverHistory() ([]*FailoverEvent, error) GetFailoverHistory() ([]*FailoverEvent, error)
// GetFailoverStats returns failover statistics // GetFailoverStats returns failover statistics
GetFailoverStats() (*FailoverStatistics, error) GetFailoverStats() (*FailoverStatistics, error)
} }
@@ -161,25 +160,25 @@ type FailoverManager interface {
type ClusterCoordinator interface { type ClusterCoordinator interface {
// SynchronizeCluster synchronizes context state across cluster // SynchronizeCluster synchronizes context state across cluster
SynchronizeCluster(ctx context.Context) (*SyncResult, error) SynchronizeCluster(ctx context.Context) (*SyncResult, error)
// GetClusterState returns current cluster state // GetClusterState returns current cluster state
GetClusterState() (*ClusterState, error) GetClusterState() (*ClusterState, error)
// GetNodeHealth returns health status of cluster nodes // GetNodeHealth returns health status of cluster nodes
GetNodeHealth() (map[string]*NodeHealth, error) GetNodeHealth() (map[string]*NodeHealth, error)
// EvictNode removes unresponsive node from cluster operations // EvictNode removes unresponsive node from cluster operations
EvictNode(ctx context.Context, nodeID string) error EvictNode(ctx context.Context, nodeID string) error
// AddNode adds new node to cluster operations // AddNode adds new node to cluster operations
AddNode(ctx context.Context, nodeID string, nodeInfo *NodeInfo) error AddNode(ctx context.Context, nodeID string, nodeInfo *NodeInfo) error
// BroadcastMessage broadcasts message to all cluster nodes // BroadcastMessage broadcasts message to all cluster nodes
BroadcastMessage(ctx context.Context, message *ClusterMessage) error BroadcastMessage(ctx context.Context, message *ClusterMessage) error
// GetClusterMetrics returns cluster performance metrics // GetClusterMetrics returns cluster performance metrics
GetClusterMetrics() (*ClusterMetrics, error) GetClusterMetrics() (*ClusterMetrics, error)
// ConfigureCluster configures cluster coordination parameters // ConfigureCluster configures cluster coordination parameters
ConfigureCluster(config *ClusterConfig) error ConfigureCluster(config *ClusterConfig) error
} }
@@ -191,25 +190,25 @@ type ClusterCoordinator interface {
type HealthMonitor interface { type HealthMonitor interface {
// CheckHealth performs comprehensive health check // CheckHealth performs comprehensive health check
CheckHealth(ctx context.Context) (*HealthStatus, error) CheckHealth(ctx context.Context) (*HealthStatus, error)
// CheckNodeHealth checks health of specific node // CheckNodeHealth checks health of specific node
CheckNodeHealth(ctx context.Context, nodeID string) (*NodeHealth, error) CheckNodeHealth(ctx context.Context, nodeID string) (*NodeHealth, error)
// CheckQueueHealth checks health of generation queue // CheckQueueHealth checks health of generation queue
CheckQueueHealth() (*QueueHealth, error) CheckQueueHealth() (*QueueHealth, error)
// CheckLeaderHealth checks health of leader node // CheckLeaderHealth checks health of leader node
CheckLeaderHealth() (*LeaderHealth, error) CheckLeaderHealth() (*LeaderHealth, error)
// GetHealthMetrics returns health monitoring metrics // GetHealthMetrics returns health monitoring metrics
GetHealthMetrics() (*HealthMetrics, error) GetHealthMetrics() (*HealthMetrics, error)
// SetHealthPolicy configures health monitoring policy // SetHealthPolicy configures health monitoring policy
SetHealthPolicy(policy *HealthPolicy) error SetHealthPolicy(policy *HealthPolicy) error
// GetHealthHistory returns history of health events // GetHealthHistory returns history of health events
GetHealthHistory(timeRange time.Duration) ([]*HealthEvent, error) GetHealthHistory(timeRange time.Duration) ([]*HealthEvent, error)
// SubscribeToHealthEvents subscribes to health event notifications // SubscribeToHealthEvents subscribes to health event notifications
SubscribeToHealthEvents(handler HealthEventHandler) error SubscribeToHealthEvents(handler HealthEventHandler) error
} }
@@ -218,19 +217,19 @@ type HealthMonitor interface {
type ResourceManager interface { type ResourceManager interface {
// AllocateResources allocates resources for context generation // AllocateResources allocates resources for context generation
AllocateResources(req *ResourceRequest) (*ResourceAllocation, error) AllocateResources(req *ResourceRequest) (*ResourceAllocation, error)
// ReleaseResources releases allocated resources // ReleaseResources releases allocated resources
ReleaseResources(allocationID string) error ReleaseResources(allocationID string) error
// GetAvailableResources returns currently available resources // GetAvailableResources returns currently available resources
GetAvailableResources() (*AvailableResources, error) GetAvailableResources() (*AvailableResources, error)
// SetResourceLimits configures resource usage limits // SetResourceLimits configures resource usage limits
SetResourceLimits(limits *ResourceLimits) error SetResourceLimits(limits *ResourceLimits) error
// GetResourceUsage returns current resource usage statistics // GetResourceUsage returns current resource usage statistics
GetResourceUsage() (*ResourceUsage, error) GetResourceUsage() (*ResourceUsage, error)
// RebalanceResources rebalances resources across operations // RebalanceResources rebalances resources across operations
RebalanceResources(ctx context.Context) (*ResourceRebalanceResult, error) RebalanceResources(ctx context.Context) (*ResourceRebalanceResult, error)
} }
@@ -244,12 +243,13 @@ type LeaderContextManager struct {
intelligence intelligence.IntelligenceEngine intelligence intelligence.IntelligenceEngine
storage storage.ContextStore storage storage.ContextStore
contextResolver slurpContext.ContextResolver contextResolver slurpContext.ContextResolver
contextUpserter slurp.ContextPersister
// Context generation state // Context generation state
generationQueue chan *ContextGenerationRequest generationQueue chan *ContextGenerationRequest
activeJobs map[string]*ContextGenerationJob activeJobs map[string]*ContextGenerationJob
completedJobs map[string]*ContextGenerationJob completedJobs map[string]*ContextGenerationJob
// Coordination components // Coordination components
coordinator GenerationCoordinator coordinator GenerationCoordinator
queueManager QueueManager queueManager QueueManager
@@ -257,16 +257,23 @@ type LeaderContextManager struct {
clusterCoord ClusterCoordinator clusterCoord ClusterCoordinator
healthMonitor HealthMonitor healthMonitor HealthMonitor
resourceManager ResourceManager resourceManager ResourceManager
// Configuration // Configuration
config *ManagerConfig config *ManagerConfig
// Statistics // Statistics
stats *ManagerStatistics stats *ManagerStatistics
// Shutdown coordination // Shutdown coordination
shutdownChan chan struct{} shutdownChan chan struct{}
shutdownOnce sync.Once shutdownOnce sync.Once
}
// SetContextPersister registers the SLURP persistence hook (Roadmap: SEC-SLURP 1.1).
func (cm *LeaderContextManager) SetContextPersister(persister slurp.ContextPersister) {
cm.mu.Lock()
defer cm.mu.Unlock()
cm.contextUpserter = persister
} }
// NewContextManager creates a new leader context manager // NewContextManager creates a new leader context manager
@@ -279,18 +286,18 @@ func NewContextManager(
) *LeaderContextManager { ) *LeaderContextManager {
cm := &LeaderContextManager{ cm := &LeaderContextManager{
election: election, election: election,
dht: dht, dht: dht,
intelligence: intelligence, intelligence: intelligence,
storage: storage, storage: storage,
contextResolver: resolver, contextResolver: resolver,
generationQueue: make(chan *ContextGenerationRequest, 1000), generationQueue: make(chan *ContextGenerationRequest, 1000),
activeJobs: make(map[string]*ContextGenerationJob), activeJobs: make(map[string]*ContextGenerationJob),
completedJobs: make(map[string]*ContextGenerationJob), completedJobs: make(map[string]*ContextGenerationJob),
shutdownChan: make(chan struct{}), shutdownChan: make(chan struct{}),
config: DefaultManagerConfig(), config: DefaultManagerConfig(),
stats: &ManagerStatistics{}, stats: &ManagerStatistics{},
} }
// Initialize coordination components // Initialize coordination components
cm.coordinator = NewGenerationCoordinator(cm) cm.coordinator = NewGenerationCoordinator(cm)
cm.queueManager = NewQueueManager(cm) cm.queueManager = NewQueueManager(cm)
@@ -298,13 +305,13 @@ func NewContextManager(
cm.clusterCoord = NewClusterCoordinator(cm) cm.clusterCoord = NewClusterCoordinator(cm)
cm.healthMonitor = NewHealthMonitor(cm) cm.healthMonitor = NewHealthMonitor(cm)
cm.resourceManager = NewResourceManager(cm) cm.resourceManager = NewResourceManager(cm)
// Start background processes // Start background processes
go cm.watchLeadershipChanges() go cm.watchLeadershipChanges()
go cm.processContextGeneration() go cm.processContextGeneration()
go cm.monitorHealth() go cm.monitorHealth()
go cm.syncCluster() go cm.syncCluster()
return cm return cm
} }
@@ -313,17 +320,17 @@ func (cm *LeaderContextManager) RequestContextGeneration(req *ContextGenerationR
if !cm.IsLeader() { if !cm.IsLeader() {
return ErrNotLeader return ErrNotLeader
} }
// Validate request // Validate request
if err := cm.validateRequest(req); err != nil { if err := cm.validateRequest(req); err != nil {
return err return err
} }
// Check for duplicates // Check for duplicates
if cm.isDuplicate(req) { if cm.isDuplicate(req) {
return ErrDuplicateRequest return ErrDuplicateRequest
} }
// Enqueue request // Enqueue request
select { select {
case cm.generationQueue <- req: case cm.generationQueue <- req:
@@ -346,7 +353,7 @@ func (cm *LeaderContextManager) IsLeader() bool {
func (cm *LeaderContextManager) GetGenerationStatus() (*GenerationStatus, error) { func (cm *LeaderContextManager) GetGenerationStatus() (*GenerationStatus, error) {
cm.mu.RLock() cm.mu.RLock()
defer cm.mu.RUnlock() defer cm.mu.RUnlock()
status := &GenerationStatus{ status := &GenerationStatus{
ActiveTasks: len(cm.activeJobs), ActiveTasks: len(cm.activeJobs),
QueuedTasks: len(cm.generationQueue), QueuedTasks: len(cm.generationQueue),
@@ -354,14 +361,14 @@ func (cm *LeaderContextManager) GetGenerationStatus() (*GenerationStatus, error)
IsLeader: cm.isLeader, IsLeader: cm.isLeader,
LastUpdate: time.Now(), LastUpdate: time.Now(),
} }
// Calculate estimated completion time // Calculate estimated completion time
if status.ActiveTasks > 0 || status.QueuedTasks > 0 { if status.ActiveTasks > 0 || status.QueuedTasks > 0 {
avgJobTime := cm.calculateAverageJobTime() avgJobTime := cm.calculateAverageJobTime()
totalRemaining := time.Duration(status.ActiveTasks+status.QueuedTasks) * avgJobTime totalRemaining := time.Duration(status.ActiveTasks+status.QueuedTasks) * avgJobTime
status.EstimatedCompletion = time.Now().Add(totalRemaining) status.EstimatedCompletion = time.Now().Add(totalRemaining)
} }
return status, nil return status, nil
} }
@@ -374,12 +381,12 @@ func (cm *LeaderContextManager) watchLeadershipChanges() {
default: default:
// Check leadership status // Check leadership status
newIsLeader := cm.election.IsLeader() newIsLeader := cm.election.IsLeader()
cm.mu.Lock() cm.mu.Lock()
oldIsLeader := cm.isLeader oldIsLeader := cm.isLeader
cm.isLeader = newIsLeader cm.isLeader = newIsLeader
cm.mu.Unlock() cm.mu.Unlock()
// Handle leadership change // Handle leadership change
if oldIsLeader != newIsLeader { if oldIsLeader != newIsLeader {
if newIsLeader { if newIsLeader {
@@ -388,7 +395,7 @@ func (cm *LeaderContextManager) watchLeadershipChanges() {
cm.onLoseLeadership() cm.onLoseLeadership()
} }
} }
// Sleep before next check // Sleep before next check
time.Sleep(cm.config.LeadershipCheckInterval) time.Sleep(cm.config.LeadershipCheckInterval)
} }
@@ -420,31 +427,31 @@ func (cm *LeaderContextManager) handleGenerationRequest(req *ContextGenerationRe
Status: JobStatusRunning, Status: JobStatusRunning,
StartedAt: time.Now(), StartedAt: time.Now(),
} }
cm.mu.Lock() cm.mu.Lock()
cm.activeJobs[job.ID] = job cm.activeJobs[job.ID] = job
cm.mu.Unlock() cm.mu.Unlock()
defer func() { defer func() {
cm.mu.Lock() cm.mu.Lock()
delete(cm.activeJobs, job.ID) delete(cm.activeJobs, job.ID)
cm.completedJobs[job.ID] = job cm.completedJobs[job.ID] = job
cm.mu.Unlock() cm.mu.Unlock()
// Clean up old completed jobs // Clean up old completed jobs
cm.cleanupCompletedJobs() cm.cleanupCompletedJobs()
}() }()
// Generate context using intelligence engine // Generate context using intelligence engine
contextNode, err := cm.intelligence.AnalyzeFile( contextNode, err := cm.intelligence.AnalyzeFile(
context.Background(), context.Background(),
req.FilePath, req.FilePath,
req.Role, req.Role,
) )
completedAt := time.Now() completedAt := time.Now()
job.CompletedAt = &completedAt job.CompletedAt = &completedAt
if err != nil { if err != nil {
job.Status = JobStatusFailed job.Status = JobStatusFailed
job.Error = err job.Error = err
@@ -453,11 +460,16 @@ func (cm *LeaderContextManager) handleGenerationRequest(req *ContextGenerationRe
job.Status = JobStatusCompleted job.Status = JobStatusCompleted
job.Result = contextNode job.Result = contextNode
cm.stats.CompletedJobs++ cm.stats.CompletedJobs++
// Store generated context // Store generated context (SEC-SLURP 1.1 persistence bridge)
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil { if cm.contextUpserter != nil {
// Log storage error but don't fail the job if _, persistErr := cm.contextUpserter.UpsertContext(context.Background(), contextNode); persistErr != nil {
// TODO: Add proper logging // TODO(SEC-SLURP 1.1): surface persistence errors via structured logging/telemetry
}
} else if cm.storage != nil {
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil {
// TODO: Add proper logging when falling back to legacy storage path
}
} }
} }
} }
@@ -494,21 +506,21 @@ func (cm *LeaderContextManager) calculateAverageJobTime() time.Duration {
if len(cm.completedJobs) == 0 { if len(cm.completedJobs) == 0 {
return time.Minute // Default estimate return time.Minute // Default estimate
} }
var totalTime time.Duration var totalTime time.Duration
count := 0 count := 0
for _, job := range cm.completedJobs { for _, job := range cm.completedJobs {
if job.CompletedAt != nil { if job.CompletedAt != nil {
totalTime += job.CompletedAt.Sub(job.StartedAt) totalTime += job.CompletedAt.Sub(job.StartedAt)
count++ count++
} }
} }
if count == 0 { if count == 0 {
return time.Minute return time.Minute
} }
return totalTime / time.Duration(count) return totalTime / time.Duration(count)
} }
@@ -520,10 +532,10 @@ func (cm *LeaderContextManager) calculateAverageWaitTime() time.Duration {
if queueLength == 0 { if queueLength == 0 {
return 0 return 0
} }
avgJobTime := cm.calculateAverageJobTime() avgJobTime := cm.calculateAverageJobTime()
concurrency := cm.config.MaxConcurrentJobs concurrency := cm.config.MaxConcurrentJobs
// Estimate wait time based on queue position and processing capacity // Estimate wait time based on queue position and processing capacity
estimatedWait := time.Duration(queueLength/concurrency) * avgJobTime estimatedWait := time.Duration(queueLength/concurrency) * avgJobTime
return estimatedWait return estimatedWait
@@ -533,22 +545,22 @@ func (cm *LeaderContextManager) calculateAverageWaitTime() time.Duration {
func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) { func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) {
cm.mu.RLock() cm.mu.RLock()
defer cm.mu.RUnlock() defer cm.mu.RUnlock()
status := &QueueStatus{ status := &QueueStatus{
QueueLength: len(cm.generationQueue), QueueLength: len(cm.generationQueue),
MaxQueueSize: cm.config.QueueSize, MaxQueueSize: cm.config.QueueSize,
QueuedRequests: []*ContextGenerationRequest{}, QueuedRequests: []*ContextGenerationRequest{},
PriorityDistribution: make(map[Priority]int), PriorityDistribution: make(map[Priority]int),
AverageWaitTime: cm.calculateAverageWaitTime(), AverageWaitTime: cm.calculateAverageWaitTime(),
} }
// Get oldest request time if any // Get oldest request time if any
if len(cm.generationQueue) > 0 { if len(cm.generationQueue) > 0 {
// Peek at queue without draining // Peek at queue without draining
oldest := time.Now() oldest := time.Now()
status.OldestRequest = &oldest status.OldestRequest = &oldest
} }
return status, nil return status, nil
} }
@@ -556,21 +568,21 @@ func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) {
func (cm *LeaderContextManager) CancelGeneration(taskID string) error { func (cm *LeaderContextManager) CancelGeneration(taskID string) error {
cm.mu.Lock() cm.mu.Lock()
defer cm.mu.Unlock() defer cm.mu.Unlock()
// Check if task is active // Check if task is active
if job, exists := cm.activeJobs[taskID]; exists { if job, exists := cm.activeJobs[taskID]; exists {
job.Status = JobStatusCancelled job.Status = JobStatusCancelled
job.Error = fmt.Errorf("task cancelled by user") job.Error = fmt.Errorf("task cancelled by user")
completedAt := time.Now() completedAt := time.Now()
job.CompletedAt = &completedAt job.CompletedAt = &completedAt
delete(cm.activeJobs, taskID) delete(cm.activeJobs, taskID)
cm.completedJobs[taskID] = job cm.completedJobs[taskID] = job
cm.stats.CancelledJobs++ cm.stats.CancelledJobs++
return nil return nil
} }
// TODO: Remove from queue if pending // TODO: Remove from queue if pending
return fmt.Errorf("task %s not found", taskID) return fmt.Errorf("task %s not found", taskID)
} }
@@ -585,11 +597,11 @@ func (cm *LeaderContextManager) PrioritizeGeneration(taskID string, priority Pri
func (cm *LeaderContextManager) GetManagerStats() (*ManagerStatistics, error) { func (cm *LeaderContextManager) GetManagerStats() (*ManagerStatistics, error) {
cm.mu.RLock() cm.mu.RLock()
defer cm.mu.RUnlock() defer cm.mu.RUnlock()
stats := *cm.stats // Copy current stats stats := *cm.stats // Copy current stats
stats.AverageJobTime = cm.calculateAverageJobTime() stats.AverageJobTime = cm.calculateAverageJobTime()
stats.HighestQueueLength = len(cm.generationQueue) stats.HighestQueueLength = len(cm.generationQueue)
return &stats, nil return &stats, nil
} }
@@ -597,7 +609,7 @@ func (cm *LeaderContextManager) onBecomeLeader() {
// Initialize leader-specific state // Initialize leader-specific state
cm.stats.LeadershipChanges++ cm.stats.LeadershipChanges++
cm.stats.LastBecameLeader = time.Now() cm.stats.LastBecameLeader = time.Now()
// Recover any pending state from previous leader // Recover any pending state from previous leader
if err := cm.failoverManager.RecoverFromFailover(context.Background()); err != nil { if err := cm.failoverManager.RecoverFromFailover(context.Background()); err != nil {
// Log error but continue - we're the leader now // Log error but continue - we're the leader now
@@ -611,7 +623,7 @@ func (cm *LeaderContextManager) onLoseLeadership() {
// TODO: Send state to new leader // TODO: Send state to new leader
_ = state _ = state
} }
cm.stats.LastLostLeadership = time.Now() cm.stats.LastLostLeadership = time.Now()
} }
@@ -623,7 +635,7 @@ func (cm *LeaderContextManager) handleNonLeaderRequest(req *ContextGenerationReq
func (cm *LeaderContextManager) monitorHealth() { func (cm *LeaderContextManager) monitorHealth() {
ticker := time.NewTicker(cm.config.HealthCheckInterval) ticker := time.NewTicker(cm.config.HealthCheckInterval)
defer ticker.Stop() defer ticker.Stop()
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
@@ -640,7 +652,7 @@ func (cm *LeaderContextManager) monitorHealth() {
func (cm *LeaderContextManager) syncCluster() { func (cm *LeaderContextManager) syncCluster() {
ticker := time.NewTicker(cm.config.ClusterSyncInterval) ticker := time.NewTicker(cm.config.ClusterSyncInterval)
defer ticker.Stop() defer ticker.Stop()
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
@@ -659,18 +671,18 @@ func (cm *LeaderContextManager) syncCluster() {
func (cm *LeaderContextManager) cleanupCompletedJobs() { func (cm *LeaderContextManager) cleanupCompletedJobs() {
cm.mu.Lock() cm.mu.Lock()
defer cm.mu.Unlock() defer cm.mu.Unlock()
if len(cm.completedJobs) <= cm.config.MaxCompletedJobs { if len(cm.completedJobs) <= cm.config.MaxCompletedJobs {
return return
} }
// Remove oldest completed jobs based on completion time // Remove oldest completed jobs based on completion time
type jobWithTime struct { type jobWithTime struct {
id string id string
job *ContextGenerationJob job *ContextGenerationJob
time time.Time time time.Time
} }
var jobs []jobWithTime var jobs []jobWithTime
for id, job := range cm.completedJobs { for id, job := range cm.completedJobs {
completedAt := time.Now() completedAt := time.Now()
@@ -679,12 +691,12 @@ func (cm *LeaderContextManager) cleanupCompletedJobs() {
} }
jobs = append(jobs, jobWithTime{id: id, job: job, time: completedAt}) jobs = append(jobs, jobWithTime{id: id, job: job, time: completedAt})
} }
// Sort by completion time (oldest first) // Sort by completion time (oldest first)
sort.Slice(jobs, func(i, j int) bool { sort.Slice(jobs, func(i, j int) bool {
return jobs[i].time.Before(jobs[j].time) return jobs[i].time.Before(jobs[j].time)
}) })
// Remove oldest jobs to get back to limit // Remove oldest jobs to get back to limit
toRemove := len(jobs) - cm.config.MaxCompletedJobs toRemove := len(jobs) - cm.config.MaxCompletedJobs
for i := 0; i < toRemove; i++ { for i := 0; i < toRemove; i++ {
@@ -701,13 +713,13 @@ func generateJobID() string {
// Error definitions // Error definitions
var ( var (
ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"} ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"}
ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"} ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"}
ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"} ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"}
ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"} ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"}
ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"} ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"}
ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"} ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"}
ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"} ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"}
) )
// LeaderError represents errors specific to leader operations // LeaderError represents errors specific to leader operations
@@ -731,4 +743,4 @@ func DefaultManagerConfig() *ManagerConfig {
MaxConcurrentJobs: 10, MaxConcurrentJobs: 10,
JobTimeout: 10 * time.Minute, JobTimeout: 10 * time.Minute,
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,69 @@
package slurp
import (
"context"
"testing"
"time"
"chorus/pkg/config"
slurpContext "chorus/pkg/slurp/context"
"chorus/pkg/ucxl"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestSLURPPersistenceLoadsContexts verifies LevelDB fallback (Roadmap: SEC-SLURP 1.1).
func TestSLURPPersistenceLoadsContexts(t *testing.T) {
configDir := t.TempDir()
cfg := &config.Config{
Slurp: config.SlurpConfig{Enabled: true},
UCXL: config.UCXLConfig{
Storage: config.StorageConfig{Directory: configDir},
},
}
primary, err := NewSLURP(cfg, nil, nil, nil)
require.NoError(t, err)
require.NoError(t, primary.Initialize(context.Background()))
t.Cleanup(func() {
_ = primary.Close()
})
address, err := ucxl.Parse("ucxl://agent:resolver@chorus:task/current/docs/example.go")
require.NoError(t, err)
node := &slurpContext.ContextNode{
Path: "docs/example.go",
UCXLAddress: *address,
Summary: "Persistent context summary",
Purpose: "Verify persistence pipeline",
Technologies: []string{"Go"},
Tags: []string{"persistence", "slurp"},
GeneratedAt: time.Now().UTC(),
RAGConfidence: 0.92,
}
_, err = primary.UpsertContext(context.Background(), node)
require.NoError(t, err)
require.NoError(t, primary.Close())
restore, err := NewSLURP(cfg, nil, nil, nil)
require.NoError(t, err)
require.NoError(t, restore.Initialize(context.Background()))
t.Cleanup(func() {
_ = restore.Close()
})
// Clear in-memory caches to force disk hydration path.
restore.contextsMu.Lock()
restore.contextStore = make(map[string]*slurpContext.ContextNode)
restore.resolvedCache = make(map[string]*slurpContext.ResolvedContext)
restore.contextsMu.Unlock()
resolved, err := restore.Resolve(context.Background(), address.String())
require.NoError(t, err)
require.NotNil(t, resolved)
assert.Equal(t, node.Summary, resolved.Summary)
assert.Equal(t, node.Purpose, resolved.Purpose)
assert.Contains(t, resolved.Technologies, "Go")
}