Wire SLURP persistence and add restart coverage
This commit is contained in:
@@ -145,7 +145,7 @@ services:
|
||||
start_period: 10s
|
||||
|
||||
whoosh:
|
||||
image: anthonyrawlins/whoosh:scaling-v1.0.0
|
||||
image: anthonyrawlins/whoosh:latest
|
||||
ports:
|
||||
- target: 8080
|
||||
published: 8800
|
||||
@@ -200,6 +200,9 @@ services:
|
||||
WHOOSH_BACKBEAT_AGENT_ID: "whoosh"
|
||||
WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"
|
||||
|
||||
# Docker integration configuration (disabled for agent assignment architecture)
|
||||
WHOOSH_DOCKER_ENABLED: "false"
|
||||
|
||||
secrets:
|
||||
- whoosh_db_password
|
||||
- gitea_token
|
||||
@@ -207,8 +210,8 @@ services:
|
||||
- jwt_secret
|
||||
- service_tokens
|
||||
- redis_password
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# volumes:
|
||||
# - /var/run/docker.sock:/var/run/docker.sock # Disabled for agent assignment architecture
|
||||
deploy:
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
|
||||
14
docs/progress/report-SEC-SLURP-1.1.md
Normal file
14
docs/progress/report-SEC-SLURP-1.1.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# SEC-SLURP 1.1 Persistence Wiring Report
|
||||
|
||||
## Summary of Changes
|
||||
- Added LevelDB-backed persistence scaffolding in `pkg/slurp/slurp.go`, capturing the storage path, local storage handle, and the roadmap-tagged metrics helpers required for SEC-SLURP 1.1.
|
||||
- Upgraded SLURP’s lifecycle so initialization bootstraps cached context data from disk, cache misses hydrate from persistence, successful `UpsertContext` calls write back to LevelDB, and shutdown closes the store with error telemetry.
|
||||
- Introduced `pkg/slurp/slurp_persistence_test.go` to confirm contexts survive process restarts and can be resolved after clearing in-memory caches.
|
||||
- Instrumented cache/persistence metrics so hit/miss ratios and storage failures are tracked for observability.
|
||||
- Attempted `GOWORK=off go test ./pkg/slurp`; execution was blocked by legacy references to `config.Authority*` symbols in `pkg/slurp/context`, so the new test did not run.
|
||||
|
||||
## Recommended Next Steps
|
||||
- Address the `config.Authority*` symbol drift (or scope down the impacted packages) so the SLURP test suite can compile cleanly, then rerun `GOWORK=off go test ./pkg/slurp` to validate persistence changes.
|
||||
- Feed the durable store into the resolver and temporal graph implementations to finish the remaining Phase 1 SLURP roadmap items.
|
||||
- Expand Prometheus metrics and logging to track cache hit/miss ratios plus persistence errors for SEC-SLURP observability goals.
|
||||
- Review unrelated changes on `feature/phase-4-real-providers` (e.g., docker-compose edits) and either align them with this roadmap work or revert to keep the branch focused.
|
||||
@@ -8,12 +8,11 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/election"
|
||||
"chorus/pkg/dht"
|
||||
"chorus/pkg/ucxl"
|
||||
"chorus/pkg/election"
|
||||
slurpContext "chorus/pkg/slurp/context"
|
||||
"chorus/pkg/slurp/intelligence"
|
||||
"chorus/pkg/slurp/storage"
|
||||
slurpContext "chorus/pkg/slurp/context"
|
||||
)
|
||||
|
||||
// ContextManager handles leader-only context generation duties
|
||||
@@ -25,34 +24,34 @@ type ContextManager interface {
|
||||
// RequestContextGeneration queues a context generation request
|
||||
// Only the leader processes these requests to prevent conflicts
|
||||
RequestContextGeneration(req *ContextGenerationRequest) error
|
||||
|
||||
|
||||
// RequestFromLeader allows non-leader nodes to request context from leader
|
||||
RequestFromLeader(req *ContextGenerationRequest) (*ContextGenerationResult, error)
|
||||
|
||||
|
||||
// GetGenerationStatus returns status of context generation operations
|
||||
GetGenerationStatus() (*GenerationStatus, error)
|
||||
|
||||
|
||||
// GetQueueStatus returns status of the generation queue
|
||||
GetQueueStatus() (*QueueStatus, error)
|
||||
|
||||
|
||||
// CancelGeneration cancels pending or active generation task
|
||||
CancelGeneration(taskID string) error
|
||||
|
||||
|
||||
// PrioritizeGeneration changes priority of queued generation task
|
||||
PrioritizeGeneration(taskID string, priority Priority) error
|
||||
|
||||
|
||||
// IsLeader returns whether this node is the current leader
|
||||
IsLeader() bool
|
||||
|
||||
|
||||
// WaitForLeadership blocks until this node becomes leader
|
||||
WaitForLeadership(ctx context.Context) error
|
||||
|
||||
|
||||
// GetLeaderInfo returns information about current leader
|
||||
GetLeaderInfo() (*LeaderInfo, error)
|
||||
|
||||
|
||||
// TransferLeadership initiates graceful leadership transfer
|
||||
TransferLeadership(ctx context.Context, targetNodeID string) error
|
||||
|
||||
|
||||
// GetManagerStats returns manager performance statistics
|
||||
GetManagerStats() (*ManagerStatistics, error)
|
||||
}
|
||||
@@ -64,25 +63,25 @@ type ContextManager interface {
|
||||
type GenerationCoordinator interface {
|
||||
// CoordinateGeneration coordinates generation of context across cluster
|
||||
CoordinateGeneration(ctx context.Context, req *ContextGenerationRequest) (*CoordinationResult, error)
|
||||
|
||||
|
||||
// DistributeGeneration distributes generation task to appropriate node
|
||||
DistributeGeneration(ctx context.Context, task *GenerationTask) error
|
||||
|
||||
|
||||
// CollectGenerationResults collects results from distributed generation
|
||||
CollectGenerationResults(ctx context.Context, taskID string) (*GenerationResults, error)
|
||||
|
||||
|
||||
// CheckGenerationStatus checks status of distributed generation
|
||||
CheckGenerationStatus(ctx context.Context, taskID string) (*TaskStatus, error)
|
||||
|
||||
|
||||
// RebalanceLoad rebalances generation load across cluster nodes
|
||||
RebalanceLoad(ctx context.Context) (*RebalanceResult, error)
|
||||
|
||||
|
||||
// GetClusterCapacity returns current cluster generation capacity
|
||||
GetClusterCapacity() (*ClusterCapacity, error)
|
||||
|
||||
|
||||
// SetGenerationPolicy configures generation coordination policy
|
||||
SetGenerationPolicy(policy *GenerationPolicy) error
|
||||
|
||||
|
||||
// GetCoordinationStats returns coordination performance statistics
|
||||
GetCoordinationStats() (*CoordinationStatistics, error)
|
||||
}
|
||||
@@ -95,31 +94,31 @@ type GenerationCoordinator interface {
|
||||
type QueueManager interface {
|
||||
// EnqueueRequest adds request to generation queue
|
||||
EnqueueRequest(req *ContextGenerationRequest) error
|
||||
|
||||
|
||||
// DequeueRequest gets next request from queue
|
||||
DequeueRequest() (*ContextGenerationRequest, error)
|
||||
|
||||
|
||||
// PeekQueue shows next request without removing it
|
||||
PeekQueue() (*ContextGenerationRequest, error)
|
||||
|
||||
|
||||
// UpdateRequestPriority changes priority of queued request
|
||||
UpdateRequestPriority(requestID string, priority Priority) error
|
||||
|
||||
|
||||
// CancelRequest removes request from queue
|
||||
CancelRequest(requestID string) error
|
||||
|
||||
|
||||
// GetQueueLength returns current queue length
|
||||
GetQueueLength() int
|
||||
|
||||
|
||||
// GetQueuedRequests returns all queued requests
|
||||
GetQueuedRequests() ([]*ContextGenerationRequest, error)
|
||||
|
||||
|
||||
// ClearQueue removes all requests from queue
|
||||
ClearQueue() error
|
||||
|
||||
|
||||
// SetQueuePolicy configures queue management policy
|
||||
SetQueuePolicy(policy *QueuePolicy) error
|
||||
|
||||
|
||||
// GetQueueStats returns queue performance statistics
|
||||
GetQueueStats() (*QueueStatistics, error)
|
||||
}
|
||||
@@ -131,25 +130,25 @@ type QueueManager interface {
|
||||
type FailoverManager interface {
|
||||
// PrepareFailover prepares current state for potential failover
|
||||
PrepareFailover(ctx context.Context) (*FailoverState, error)
|
||||
|
||||
|
||||
// ExecuteFailover executes failover to become new leader
|
||||
ExecuteFailover(ctx context.Context, previousState *FailoverState) error
|
||||
|
||||
|
||||
// TransferState transfers leadership state to another node
|
||||
TransferState(ctx context.Context, targetNodeID string) error
|
||||
|
||||
|
||||
// ReceiveState receives leadership state from previous leader
|
||||
ReceiveState(ctx context.Context, state *FailoverState) error
|
||||
|
||||
|
||||
// ValidateState validates received failover state
|
||||
ValidateState(state *FailoverState) (*StateValidation, error)
|
||||
|
||||
|
||||
// RecoverFromFailover recovers operations after failover
|
||||
RecoverFromFailover(ctx context.Context) (*RecoveryResult, error)
|
||||
|
||||
|
||||
// GetFailoverHistory returns history of failover events
|
||||
GetFailoverHistory() ([]*FailoverEvent, error)
|
||||
|
||||
|
||||
// GetFailoverStats returns failover statistics
|
||||
GetFailoverStats() (*FailoverStatistics, error)
|
||||
}
|
||||
@@ -161,25 +160,25 @@ type FailoverManager interface {
|
||||
type ClusterCoordinator interface {
|
||||
// SynchronizeCluster synchronizes context state across cluster
|
||||
SynchronizeCluster(ctx context.Context) (*SyncResult, error)
|
||||
|
||||
|
||||
// GetClusterState returns current cluster state
|
||||
GetClusterState() (*ClusterState, error)
|
||||
|
||||
|
||||
// GetNodeHealth returns health status of cluster nodes
|
||||
GetNodeHealth() (map[string]*NodeHealth, error)
|
||||
|
||||
|
||||
// EvictNode removes unresponsive node from cluster operations
|
||||
EvictNode(ctx context.Context, nodeID string) error
|
||||
|
||||
|
||||
// AddNode adds new node to cluster operations
|
||||
AddNode(ctx context.Context, nodeID string, nodeInfo *NodeInfo) error
|
||||
|
||||
|
||||
// BroadcastMessage broadcasts message to all cluster nodes
|
||||
BroadcastMessage(ctx context.Context, message *ClusterMessage) error
|
||||
|
||||
|
||||
// GetClusterMetrics returns cluster performance metrics
|
||||
GetClusterMetrics() (*ClusterMetrics, error)
|
||||
|
||||
|
||||
// ConfigureCluster configures cluster coordination parameters
|
||||
ConfigureCluster(config *ClusterConfig) error
|
||||
}
|
||||
@@ -191,25 +190,25 @@ type ClusterCoordinator interface {
|
||||
type HealthMonitor interface {
|
||||
// CheckHealth performs comprehensive health check
|
||||
CheckHealth(ctx context.Context) (*HealthStatus, error)
|
||||
|
||||
|
||||
// CheckNodeHealth checks health of specific node
|
||||
CheckNodeHealth(ctx context.Context, nodeID string) (*NodeHealth, error)
|
||||
|
||||
|
||||
// CheckQueueHealth checks health of generation queue
|
||||
CheckQueueHealth() (*QueueHealth, error)
|
||||
|
||||
|
||||
// CheckLeaderHealth checks health of leader node
|
||||
CheckLeaderHealth() (*LeaderHealth, error)
|
||||
|
||||
|
||||
// GetHealthMetrics returns health monitoring metrics
|
||||
GetHealthMetrics() (*HealthMetrics, error)
|
||||
|
||||
|
||||
// SetHealthPolicy configures health monitoring policy
|
||||
SetHealthPolicy(policy *HealthPolicy) error
|
||||
|
||||
|
||||
// GetHealthHistory returns history of health events
|
||||
GetHealthHistory(timeRange time.Duration) ([]*HealthEvent, error)
|
||||
|
||||
|
||||
// SubscribeToHealthEvents subscribes to health event notifications
|
||||
SubscribeToHealthEvents(handler HealthEventHandler) error
|
||||
}
|
||||
@@ -218,19 +217,19 @@ type HealthMonitor interface {
|
||||
type ResourceManager interface {
|
||||
// AllocateResources allocates resources for context generation
|
||||
AllocateResources(req *ResourceRequest) (*ResourceAllocation, error)
|
||||
|
||||
|
||||
// ReleaseResources releases allocated resources
|
||||
ReleaseResources(allocationID string) error
|
||||
|
||||
|
||||
// GetAvailableResources returns currently available resources
|
||||
GetAvailableResources() (*AvailableResources, error)
|
||||
|
||||
|
||||
// SetResourceLimits configures resource usage limits
|
||||
SetResourceLimits(limits *ResourceLimits) error
|
||||
|
||||
|
||||
// GetResourceUsage returns current resource usage statistics
|
||||
GetResourceUsage() (*ResourceUsage, error)
|
||||
|
||||
|
||||
// RebalanceResources rebalances resources across operations
|
||||
RebalanceResources(ctx context.Context) (*ResourceRebalanceResult, error)
|
||||
}
|
||||
@@ -244,12 +243,13 @@ type LeaderContextManager struct {
|
||||
intelligence intelligence.IntelligenceEngine
|
||||
storage storage.ContextStore
|
||||
contextResolver slurpContext.ContextResolver
|
||||
|
||||
contextUpserter slurp.ContextPersister
|
||||
|
||||
// Context generation state
|
||||
generationQueue chan *ContextGenerationRequest
|
||||
activeJobs map[string]*ContextGenerationJob
|
||||
completedJobs map[string]*ContextGenerationJob
|
||||
|
||||
|
||||
// Coordination components
|
||||
coordinator GenerationCoordinator
|
||||
queueManager QueueManager
|
||||
@@ -257,16 +257,23 @@ type LeaderContextManager struct {
|
||||
clusterCoord ClusterCoordinator
|
||||
healthMonitor HealthMonitor
|
||||
resourceManager ResourceManager
|
||||
|
||||
|
||||
// Configuration
|
||||
config *ManagerConfig
|
||||
|
||||
config *ManagerConfig
|
||||
|
||||
// Statistics
|
||||
stats *ManagerStatistics
|
||||
|
||||
stats *ManagerStatistics
|
||||
|
||||
// Shutdown coordination
|
||||
shutdownChan chan struct{}
|
||||
shutdownOnce sync.Once
|
||||
shutdownChan chan struct{}
|
||||
shutdownOnce sync.Once
|
||||
}
|
||||
|
||||
// SetContextPersister registers the SLURP persistence hook (Roadmap: SEC-SLURP 1.1).
|
||||
func (cm *LeaderContextManager) SetContextPersister(persister slurp.ContextPersister) {
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
cm.contextUpserter = persister
|
||||
}
|
||||
|
||||
// NewContextManager creates a new leader context manager
|
||||
@@ -279,18 +286,18 @@ func NewContextManager(
|
||||
) *LeaderContextManager {
|
||||
cm := &LeaderContextManager{
|
||||
election: election,
|
||||
dht: dht,
|
||||
intelligence: intelligence,
|
||||
storage: storage,
|
||||
dht: dht,
|
||||
intelligence: intelligence,
|
||||
storage: storage,
|
||||
contextResolver: resolver,
|
||||
generationQueue: make(chan *ContextGenerationRequest, 1000),
|
||||
activeJobs: make(map[string]*ContextGenerationJob),
|
||||
completedJobs: make(map[string]*ContextGenerationJob),
|
||||
shutdownChan: make(chan struct{}),
|
||||
config: DefaultManagerConfig(),
|
||||
stats: &ManagerStatistics{},
|
||||
activeJobs: make(map[string]*ContextGenerationJob),
|
||||
completedJobs: make(map[string]*ContextGenerationJob),
|
||||
shutdownChan: make(chan struct{}),
|
||||
config: DefaultManagerConfig(),
|
||||
stats: &ManagerStatistics{},
|
||||
}
|
||||
|
||||
|
||||
// Initialize coordination components
|
||||
cm.coordinator = NewGenerationCoordinator(cm)
|
||||
cm.queueManager = NewQueueManager(cm)
|
||||
@@ -298,13 +305,13 @@ func NewContextManager(
|
||||
cm.clusterCoord = NewClusterCoordinator(cm)
|
||||
cm.healthMonitor = NewHealthMonitor(cm)
|
||||
cm.resourceManager = NewResourceManager(cm)
|
||||
|
||||
|
||||
// Start background processes
|
||||
go cm.watchLeadershipChanges()
|
||||
go cm.processContextGeneration()
|
||||
go cm.monitorHealth()
|
||||
go cm.syncCluster()
|
||||
|
||||
|
||||
return cm
|
||||
}
|
||||
|
||||
@@ -313,17 +320,17 @@ func (cm *LeaderContextManager) RequestContextGeneration(req *ContextGenerationR
|
||||
if !cm.IsLeader() {
|
||||
return ErrNotLeader
|
||||
}
|
||||
|
||||
|
||||
// Validate request
|
||||
if err := cm.validateRequest(req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
||||
// Check for duplicates
|
||||
if cm.isDuplicate(req) {
|
||||
return ErrDuplicateRequest
|
||||
}
|
||||
|
||||
|
||||
// Enqueue request
|
||||
select {
|
||||
case cm.generationQueue <- req:
|
||||
@@ -346,7 +353,7 @@ func (cm *LeaderContextManager) IsLeader() bool {
|
||||
func (cm *LeaderContextManager) GetGenerationStatus() (*GenerationStatus, error) {
|
||||
cm.mu.RLock()
|
||||
defer cm.mu.RUnlock()
|
||||
|
||||
|
||||
status := &GenerationStatus{
|
||||
ActiveTasks: len(cm.activeJobs),
|
||||
QueuedTasks: len(cm.generationQueue),
|
||||
@@ -354,14 +361,14 @@ func (cm *LeaderContextManager) GetGenerationStatus() (*GenerationStatus, error)
|
||||
IsLeader: cm.isLeader,
|
||||
LastUpdate: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
// Calculate estimated completion time
|
||||
if status.ActiveTasks > 0 || status.QueuedTasks > 0 {
|
||||
avgJobTime := cm.calculateAverageJobTime()
|
||||
totalRemaining := time.Duration(status.ActiveTasks+status.QueuedTasks) * avgJobTime
|
||||
status.EstimatedCompletion = time.Now().Add(totalRemaining)
|
||||
}
|
||||
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
@@ -374,12 +381,12 @@ func (cm *LeaderContextManager) watchLeadershipChanges() {
|
||||
default:
|
||||
// Check leadership status
|
||||
newIsLeader := cm.election.IsLeader()
|
||||
|
||||
|
||||
cm.mu.Lock()
|
||||
oldIsLeader := cm.isLeader
|
||||
cm.isLeader = newIsLeader
|
||||
cm.mu.Unlock()
|
||||
|
||||
|
||||
// Handle leadership change
|
||||
if oldIsLeader != newIsLeader {
|
||||
if newIsLeader {
|
||||
@@ -388,7 +395,7 @@ func (cm *LeaderContextManager) watchLeadershipChanges() {
|
||||
cm.onLoseLeadership()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sleep before next check
|
||||
time.Sleep(cm.config.LeadershipCheckInterval)
|
||||
}
|
||||
@@ -420,31 +427,31 @@ func (cm *LeaderContextManager) handleGenerationRequest(req *ContextGenerationRe
|
||||
Status: JobStatusRunning,
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
|
||||
|
||||
cm.mu.Lock()
|
||||
cm.activeJobs[job.ID] = job
|
||||
cm.mu.Unlock()
|
||||
|
||||
|
||||
defer func() {
|
||||
cm.mu.Lock()
|
||||
delete(cm.activeJobs, job.ID)
|
||||
cm.completedJobs[job.ID] = job
|
||||
cm.mu.Unlock()
|
||||
|
||||
|
||||
// Clean up old completed jobs
|
||||
cm.cleanupCompletedJobs()
|
||||
}()
|
||||
|
||||
|
||||
// Generate context using intelligence engine
|
||||
contextNode, err := cm.intelligence.AnalyzeFile(
|
||||
context.Background(),
|
||||
req.FilePath,
|
||||
req.Role,
|
||||
)
|
||||
|
||||
|
||||
completedAt := time.Now()
|
||||
job.CompletedAt = &completedAt
|
||||
|
||||
|
||||
if err != nil {
|
||||
job.Status = JobStatusFailed
|
||||
job.Error = err
|
||||
@@ -453,11 +460,16 @@ func (cm *LeaderContextManager) handleGenerationRequest(req *ContextGenerationRe
|
||||
job.Status = JobStatusCompleted
|
||||
job.Result = contextNode
|
||||
cm.stats.CompletedJobs++
|
||||
|
||||
// Store generated context
|
||||
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil {
|
||||
// Log storage error but don't fail the job
|
||||
// TODO: Add proper logging
|
||||
|
||||
// Store generated context (SEC-SLURP 1.1 persistence bridge)
|
||||
if cm.contextUpserter != nil {
|
||||
if _, persistErr := cm.contextUpserter.UpsertContext(context.Background(), contextNode); persistErr != nil {
|
||||
// TODO(SEC-SLURP 1.1): surface persistence errors via structured logging/telemetry
|
||||
}
|
||||
} else if cm.storage != nil {
|
||||
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil {
|
||||
// TODO: Add proper logging when falling back to legacy storage path
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -494,21 +506,21 @@ func (cm *LeaderContextManager) calculateAverageJobTime() time.Duration {
|
||||
if len(cm.completedJobs) == 0 {
|
||||
return time.Minute // Default estimate
|
||||
}
|
||||
|
||||
|
||||
var totalTime time.Duration
|
||||
count := 0
|
||||
|
||||
|
||||
for _, job := range cm.completedJobs {
|
||||
if job.CompletedAt != nil {
|
||||
totalTime += job.CompletedAt.Sub(job.StartedAt)
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if count == 0 {
|
||||
return time.Minute
|
||||
}
|
||||
|
||||
|
||||
return totalTime / time.Duration(count)
|
||||
}
|
||||
|
||||
@@ -520,10 +532,10 @@ func (cm *LeaderContextManager) calculateAverageWaitTime() time.Duration {
|
||||
if queueLength == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
avgJobTime := cm.calculateAverageJobTime()
|
||||
concurrency := cm.config.MaxConcurrentJobs
|
||||
|
||||
|
||||
// Estimate wait time based on queue position and processing capacity
|
||||
estimatedWait := time.Duration(queueLength/concurrency) * avgJobTime
|
||||
return estimatedWait
|
||||
@@ -533,22 +545,22 @@ func (cm *LeaderContextManager) calculateAverageWaitTime() time.Duration {
|
||||
func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) {
|
||||
cm.mu.RLock()
|
||||
defer cm.mu.RUnlock()
|
||||
|
||||
|
||||
status := &QueueStatus{
|
||||
QueueLength: len(cm.generationQueue),
|
||||
MaxQueueSize: cm.config.QueueSize,
|
||||
QueuedRequests: []*ContextGenerationRequest{},
|
||||
QueueLength: len(cm.generationQueue),
|
||||
MaxQueueSize: cm.config.QueueSize,
|
||||
QueuedRequests: []*ContextGenerationRequest{},
|
||||
PriorityDistribution: make(map[Priority]int),
|
||||
AverageWaitTime: cm.calculateAverageWaitTime(),
|
||||
AverageWaitTime: cm.calculateAverageWaitTime(),
|
||||
}
|
||||
|
||||
|
||||
// Get oldest request time if any
|
||||
if len(cm.generationQueue) > 0 {
|
||||
// Peek at queue without draining
|
||||
oldest := time.Now()
|
||||
status.OldestRequest = &oldest
|
||||
}
|
||||
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
@@ -556,21 +568,21 @@ func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) {
|
||||
func (cm *LeaderContextManager) CancelGeneration(taskID string) error {
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
|
||||
|
||||
// Check if task is active
|
||||
if job, exists := cm.activeJobs[taskID]; exists {
|
||||
job.Status = JobStatusCancelled
|
||||
job.Error = fmt.Errorf("task cancelled by user")
|
||||
completedAt := time.Now()
|
||||
job.CompletedAt = &completedAt
|
||||
|
||||
|
||||
delete(cm.activeJobs, taskID)
|
||||
cm.completedJobs[taskID] = job
|
||||
cm.stats.CancelledJobs++
|
||||
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// TODO: Remove from queue if pending
|
||||
return fmt.Errorf("task %s not found", taskID)
|
||||
}
|
||||
@@ -585,11 +597,11 @@ func (cm *LeaderContextManager) PrioritizeGeneration(taskID string, priority Pri
|
||||
func (cm *LeaderContextManager) GetManagerStats() (*ManagerStatistics, error) {
|
||||
cm.mu.RLock()
|
||||
defer cm.mu.RUnlock()
|
||||
|
||||
|
||||
stats := *cm.stats // Copy current stats
|
||||
stats.AverageJobTime = cm.calculateAverageJobTime()
|
||||
stats.HighestQueueLength = len(cm.generationQueue)
|
||||
|
||||
|
||||
return &stats, nil
|
||||
}
|
||||
|
||||
@@ -597,7 +609,7 @@ func (cm *LeaderContextManager) onBecomeLeader() {
|
||||
// Initialize leader-specific state
|
||||
cm.stats.LeadershipChanges++
|
||||
cm.stats.LastBecameLeader = time.Now()
|
||||
|
||||
|
||||
// Recover any pending state from previous leader
|
||||
if err := cm.failoverManager.RecoverFromFailover(context.Background()); err != nil {
|
||||
// Log error but continue - we're the leader now
|
||||
@@ -611,7 +623,7 @@ func (cm *LeaderContextManager) onLoseLeadership() {
|
||||
// TODO: Send state to new leader
|
||||
_ = state
|
||||
}
|
||||
|
||||
|
||||
cm.stats.LastLostLeadership = time.Now()
|
||||
}
|
||||
|
||||
@@ -623,7 +635,7 @@ func (cm *LeaderContextManager) handleNonLeaderRequest(req *ContextGenerationReq
|
||||
func (cm *LeaderContextManager) monitorHealth() {
|
||||
ticker := time.NewTicker(cm.config.HealthCheckInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
@@ -640,7 +652,7 @@ func (cm *LeaderContextManager) monitorHealth() {
|
||||
func (cm *LeaderContextManager) syncCluster() {
|
||||
ticker := time.NewTicker(cm.config.ClusterSyncInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
@@ -659,18 +671,18 @@ func (cm *LeaderContextManager) syncCluster() {
|
||||
func (cm *LeaderContextManager) cleanupCompletedJobs() {
|
||||
cm.mu.Lock()
|
||||
defer cm.mu.Unlock()
|
||||
|
||||
|
||||
if len(cm.completedJobs) <= cm.config.MaxCompletedJobs {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Remove oldest completed jobs based on completion time
|
||||
type jobWithTime struct {
|
||||
id string
|
||||
job *ContextGenerationJob
|
||||
time time.Time
|
||||
}
|
||||
|
||||
|
||||
var jobs []jobWithTime
|
||||
for id, job := range cm.completedJobs {
|
||||
completedAt := time.Now()
|
||||
@@ -679,12 +691,12 @@ func (cm *LeaderContextManager) cleanupCompletedJobs() {
|
||||
}
|
||||
jobs = append(jobs, jobWithTime{id: id, job: job, time: completedAt})
|
||||
}
|
||||
|
||||
|
||||
// Sort by completion time (oldest first)
|
||||
sort.Slice(jobs, func(i, j int) bool {
|
||||
return jobs[i].time.Before(jobs[j].time)
|
||||
})
|
||||
|
||||
|
||||
// Remove oldest jobs to get back to limit
|
||||
toRemove := len(jobs) - cm.config.MaxCompletedJobs
|
||||
for i := 0; i < toRemove; i++ {
|
||||
@@ -701,13 +713,13 @@ func generateJobID() string {
|
||||
|
||||
// Error definitions
|
||||
var (
|
||||
ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"}
|
||||
ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"}
|
||||
ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"}
|
||||
ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"}
|
||||
ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"}
|
||||
ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"}
|
||||
ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"}
|
||||
ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"}
|
||||
ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"}
|
||||
ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"}
|
||||
ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"}
|
||||
ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"}
|
||||
ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"}
|
||||
ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"}
|
||||
)
|
||||
|
||||
// LeaderError represents errors specific to leader operations
|
||||
@@ -731,4 +743,4 @@ func DefaultManagerConfig() *ManagerConfig {
|
||||
MaxConcurrentJobs: 10,
|
||||
JobTimeout: 10 * time.Minute,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1064
pkg/slurp/slurp.go
1064
pkg/slurp/slurp.go
File diff suppressed because it is too large
Load Diff
69
pkg/slurp/slurp_persistence_test.go
Normal file
69
pkg/slurp/slurp_persistence_test.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package slurp
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"chorus/pkg/config"
|
||||
slurpContext "chorus/pkg/slurp/context"
|
||||
"chorus/pkg/ucxl"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestSLURPPersistenceLoadsContexts verifies LevelDB fallback (Roadmap: SEC-SLURP 1.1).
|
||||
func TestSLURPPersistenceLoadsContexts(t *testing.T) {
|
||||
configDir := t.TempDir()
|
||||
cfg := &config.Config{
|
||||
Slurp: config.SlurpConfig{Enabled: true},
|
||||
UCXL: config.UCXLConfig{
|
||||
Storage: config.StorageConfig{Directory: configDir},
|
||||
},
|
||||
}
|
||||
|
||||
primary, err := NewSLURP(cfg, nil, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, primary.Initialize(context.Background()))
|
||||
t.Cleanup(func() {
|
||||
_ = primary.Close()
|
||||
})
|
||||
|
||||
address, err := ucxl.Parse("ucxl://agent:resolver@chorus:task/current/docs/example.go")
|
||||
require.NoError(t, err)
|
||||
|
||||
node := &slurpContext.ContextNode{
|
||||
Path: "docs/example.go",
|
||||
UCXLAddress: *address,
|
||||
Summary: "Persistent context summary",
|
||||
Purpose: "Verify persistence pipeline",
|
||||
Technologies: []string{"Go"},
|
||||
Tags: []string{"persistence", "slurp"},
|
||||
GeneratedAt: time.Now().UTC(),
|
||||
RAGConfidence: 0.92,
|
||||
}
|
||||
|
||||
_, err = primary.UpsertContext(context.Background(), node)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, primary.Close())
|
||||
|
||||
restore, err := NewSLURP(cfg, nil, nil, nil)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, restore.Initialize(context.Background()))
|
||||
t.Cleanup(func() {
|
||||
_ = restore.Close()
|
||||
})
|
||||
|
||||
// Clear in-memory caches to force disk hydration path.
|
||||
restore.contextsMu.Lock()
|
||||
restore.contextStore = make(map[string]*slurpContext.ContextNode)
|
||||
restore.resolvedCache = make(map[string]*slurpContext.ResolvedContext)
|
||||
restore.contextsMu.Unlock()
|
||||
|
||||
resolved, err := restore.Resolve(context.Background(), address.String())
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resolved)
|
||||
assert.Equal(t, node.Summary, resolved.Summary)
|
||||
assert.Equal(t, node.Purpose, resolved.Purpose)
|
||||
assert.Contains(t, resolved.Technologies, "Go")
|
||||
}
|
||||
Reference in New Issue
Block a user