Wire SLURP persistence and add restart coverage

This commit is contained in:
anthonyrawlins
2025-09-27 15:26:25 +10:00
parent 17673c38a6
commit 0b670a535d
5 changed files with 1061 additions and 369 deletions

View File

@@ -145,7 +145,7 @@ services:
start_period: 10s
whoosh:
image: anthonyrawlins/whoosh:scaling-v1.0.0
image: anthonyrawlins/whoosh:latest
ports:
- target: 8080
published: 8800
@@ -200,6 +200,9 @@ services:
WHOOSH_BACKBEAT_AGENT_ID: "whoosh"
WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"
# Docker integration configuration (disabled for agent assignment architecture)
WHOOSH_DOCKER_ENABLED: "false"
secrets:
- whoosh_db_password
- gitea_token
@@ -207,8 +210,8 @@ services:
- jwt_secret
- service_tokens
- redis_password
volumes:
- /var/run/docker.sock:/var/run/docker.sock
# volumes:
# - /var/run/docker.sock:/var/run/docker.sock # Disabled for agent assignment architecture
deploy:
replicas: 2
restart_policy:

View File

@@ -0,0 +1,14 @@
# SEC-SLURP 1.1 Persistence Wiring Report
## Summary of Changes
- Added LevelDB-backed persistence scaffolding in `pkg/slurp/slurp.go`, capturing the storage path, local storage handle, and the roadmap-tagged metrics helpers required for SEC-SLURP1.1.
- Upgraded SLURPs lifecycle so initialization bootstraps cached context data from disk, cache misses hydrate from persistence, successful `UpsertContext` calls write back to LevelDB, and shutdown closes the store with error telemetry.
- Introduced `pkg/slurp/slurp_persistence_test.go` to confirm contexts survive process restarts and can be resolved after clearing in-memory caches.
- Instrumented cache/persistence metrics so hit/miss ratios and storage failures are tracked for observability.
- Attempted `GOWORK=off go test ./pkg/slurp`; execution was blocked by legacy references to `config.Authority*` symbols in `pkg/slurp/context`, so the new test did not run.
## Recommended Next Steps
- Address the `config.Authority*` symbol drift (or scope down the impacted packages) so the SLURP test suite can compile cleanly, then rerun `GOWORK=off go test ./pkg/slurp` to validate persistence changes.
- Feed the durable store into the resolver and temporal graph implementations to finish the remaining Phase1 SLURP roadmap items.
- Expand Prometheus metrics and logging to track cache hit/miss ratios plus persistence errors for SEC-SLURP observability goals.
- Review unrelated changes on `feature/phase-4-real-providers` (e.g., docker-compose edits) and either align them with this roadmap work or revert to keep the branch focused.

View File

@@ -8,12 +8,11 @@ import (
"sync"
"time"
"chorus/pkg/election"
"chorus/pkg/dht"
"chorus/pkg/ucxl"
"chorus/pkg/election"
slurpContext "chorus/pkg/slurp/context"
"chorus/pkg/slurp/intelligence"
"chorus/pkg/slurp/storage"
slurpContext "chorus/pkg/slurp/context"
)
// ContextManager handles leader-only context generation duties
@@ -244,6 +243,7 @@ type LeaderContextManager struct {
intelligence intelligence.IntelligenceEngine
storage storage.ContextStore
contextResolver slurpContext.ContextResolver
contextUpserter slurp.ContextPersister
// Context generation state
generationQueue chan *ContextGenerationRequest
@@ -259,14 +259,21 @@ type LeaderContextManager struct {
resourceManager ResourceManager
// Configuration
config *ManagerConfig
config *ManagerConfig
// Statistics
stats *ManagerStatistics
stats *ManagerStatistics
// Shutdown coordination
shutdownChan chan struct{}
shutdownOnce sync.Once
shutdownChan chan struct{}
shutdownOnce sync.Once
}
// SetContextPersister registers the SLURP persistence hook (Roadmap: SEC-SLURP 1.1).
func (cm *LeaderContextManager) SetContextPersister(persister slurp.ContextPersister) {
cm.mu.Lock()
defer cm.mu.Unlock()
cm.contextUpserter = persister
}
// NewContextManager creates a new leader context manager
@@ -279,16 +286,16 @@ func NewContextManager(
) *LeaderContextManager {
cm := &LeaderContextManager{
election: election,
dht: dht,
intelligence: intelligence,
storage: storage,
dht: dht,
intelligence: intelligence,
storage: storage,
contextResolver: resolver,
generationQueue: make(chan *ContextGenerationRequest, 1000),
activeJobs: make(map[string]*ContextGenerationJob),
completedJobs: make(map[string]*ContextGenerationJob),
shutdownChan: make(chan struct{}),
config: DefaultManagerConfig(),
stats: &ManagerStatistics{},
activeJobs: make(map[string]*ContextGenerationJob),
completedJobs: make(map[string]*ContextGenerationJob),
shutdownChan: make(chan struct{}),
config: DefaultManagerConfig(),
stats: &ManagerStatistics{},
}
// Initialize coordination components
@@ -454,10 +461,15 @@ func (cm *LeaderContextManager) handleGenerationRequest(req *ContextGenerationRe
job.Result = contextNode
cm.stats.CompletedJobs++
// Store generated context
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil {
// Log storage error but don't fail the job
// TODO: Add proper logging
// Store generated context (SEC-SLURP 1.1 persistence bridge)
if cm.contextUpserter != nil {
if _, persistErr := cm.contextUpserter.UpsertContext(context.Background(), contextNode); persistErr != nil {
// TODO(SEC-SLURP 1.1): surface persistence errors via structured logging/telemetry
}
} else if cm.storage != nil {
if err := cm.storage.StoreContext(context.Background(), contextNode, []string{req.Role}); err != nil {
// TODO: Add proper logging when falling back to legacy storage path
}
}
}
}
@@ -535,11 +547,11 @@ func (cm *LeaderContextManager) GetQueueStatus() (*QueueStatus, error) {
defer cm.mu.RUnlock()
status := &QueueStatus{
QueueLength: len(cm.generationQueue),
MaxQueueSize: cm.config.QueueSize,
QueuedRequests: []*ContextGenerationRequest{},
QueueLength: len(cm.generationQueue),
MaxQueueSize: cm.config.QueueSize,
QueuedRequests: []*ContextGenerationRequest{},
PriorityDistribution: make(map[Priority]int),
AverageWaitTime: cm.calculateAverageWaitTime(),
AverageWaitTime: cm.calculateAverageWaitTime(),
}
// Get oldest request time if any
@@ -701,13 +713,13 @@ func generateJobID() string {
// Error definitions
var (
ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"}
ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"}
ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"}
ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"}
ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"}
ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"}
ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"}
ErrNotLeader = &LeaderError{Code: "NOT_LEADER", Message: "Node is not the leader"}
ErrQueueFull = &LeaderError{Code: "QUEUE_FULL", Message: "Generation queue is full"}
ErrDuplicateRequest = &LeaderError{Code: "DUPLICATE_REQUEST", Message: "Duplicate generation request"}
ErrInvalidRequest = &LeaderError{Code: "INVALID_REQUEST", Message: "Invalid generation request"}
ErrMissingUCXLAddress = &LeaderError{Code: "MISSING_UCXL_ADDRESS", Message: "Missing UCXL address"}
ErrMissingFilePath = &LeaderError{Code: "MISSING_FILE_PATH", Message: "Missing file path"}
ErrMissingRole = &LeaderError{Code: "MISSING_ROLE", Message: "Missing role"}
)
// LeaderError represents errors specific to leader operations

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,69 @@
package slurp
import (
"context"
"testing"
"time"
"chorus/pkg/config"
slurpContext "chorus/pkg/slurp/context"
"chorus/pkg/ucxl"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestSLURPPersistenceLoadsContexts verifies LevelDB fallback (Roadmap: SEC-SLURP 1.1).
func TestSLURPPersistenceLoadsContexts(t *testing.T) {
configDir := t.TempDir()
cfg := &config.Config{
Slurp: config.SlurpConfig{Enabled: true},
UCXL: config.UCXLConfig{
Storage: config.StorageConfig{Directory: configDir},
},
}
primary, err := NewSLURP(cfg, nil, nil, nil)
require.NoError(t, err)
require.NoError(t, primary.Initialize(context.Background()))
t.Cleanup(func() {
_ = primary.Close()
})
address, err := ucxl.Parse("ucxl://agent:resolver@chorus:task/current/docs/example.go")
require.NoError(t, err)
node := &slurpContext.ContextNode{
Path: "docs/example.go",
UCXLAddress: *address,
Summary: "Persistent context summary",
Purpose: "Verify persistence pipeline",
Technologies: []string{"Go"},
Tags: []string{"persistence", "slurp"},
GeneratedAt: time.Now().UTC(),
RAGConfidence: 0.92,
}
_, err = primary.UpsertContext(context.Background(), node)
require.NoError(t, err)
require.NoError(t, primary.Close())
restore, err := NewSLURP(cfg, nil, nil, nil)
require.NoError(t, err)
require.NoError(t, restore.Initialize(context.Background()))
t.Cleanup(func() {
_ = restore.Close()
})
// Clear in-memory caches to force disk hydration path.
restore.contextsMu.Lock()
restore.contextStore = make(map[string]*slurpContext.ContextNode)
restore.resolvedCache = make(map[string]*slurpContext.ResolvedContext)
restore.contextsMu.Unlock()
resolved, err := restore.Resolve(context.Background(), address.String())
require.NoError(t, err)
require.NotNil(t, resolved)
assert.Equal(t, node.Summary, resolved.Summary)
assert.Equal(t, node.Purpose, resolved.Purpose)
assert.Contains(t, resolved.Technologies, "Go")
}