Implement initial scan logic and council formation for WHOOSH project kickoffs
- Replace incremental sync with full scan for new repositories - Add initial_scan status to bypass Since parameter filtering - Implement council formation detection for Design Brief issues - Add version display to WHOOSH UI header for debugging - Fix Docker token authentication with trailing newline removal - Add comprehensive council orchestration with Docker Swarm integration - Include BACKBEAT prototype integration for distributed timing - Support council-specific agent roles and deployment strategies - Transition repositories to active status after content discovery Key architectural improvements: - Full scan approach for new project detection vs incremental sync - Council formation triggered by chorus-entrypoint labeled Design Briefs - Proper token handling and authentication for Gitea API calls - Support for both initial discovery and ongoing task monitoring This enables autonomous project kickoff workflows where Design Brief issues automatically trigger formation of specialized agent councils for new projects. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
328
internal/agents/registry.go
Normal file
328
internal/agents/registry.go
Normal file
@@ -0,0 +1,328 @@
|
||||
package agents
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/chorus-services/whoosh/internal/p2p"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// Registry manages agent registration and synchronization with the database
|
||||
type Registry struct {
|
||||
db *pgxpool.Pool
|
||||
discovery *p2p.Discovery
|
||||
stopCh chan struct{}
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// NewRegistry creates a new agent registry service
|
||||
func NewRegistry(db *pgxpool.Pool, discovery *p2p.Discovery) *Registry {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
return &Registry{
|
||||
db: db,
|
||||
discovery: discovery,
|
||||
stopCh: make(chan struct{}),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the agent registry synchronization
|
||||
func (r *Registry) Start() error {
|
||||
log.Info().Msg("🔄 Starting CHORUS agent registry synchronization")
|
||||
|
||||
// Start periodic synchronization of discovered agents with database
|
||||
go r.syncDiscoveredAgents()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop shuts down the agent registry
|
||||
func (r *Registry) Stop() error {
|
||||
log.Info().Msg("🔄 Stopping CHORUS agent registry synchronization")
|
||||
|
||||
r.cancel()
|
||||
close(r.stopCh)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncDiscoveredAgents periodically syncs P2P discovered agents to database
|
||||
func (r *Registry) syncDiscoveredAgents() {
|
||||
// Initial sync
|
||||
r.performSync()
|
||||
|
||||
// Then sync every 30 seconds
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-r.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
r.performSync()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// performSync synchronizes discovered agents with the database
|
||||
func (r *Registry) performSync() {
|
||||
discoveredAgents := r.discovery.GetAgents()
|
||||
|
||||
log.Debug().
|
||||
Int("discovered_count", len(discoveredAgents)).
|
||||
Msg("Synchronizing discovered agents with database")
|
||||
|
||||
for _, agent := range discoveredAgents {
|
||||
err := r.upsertAgent(r.ctx, agent)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Err(err).
|
||||
Str("agent_id", agent.ID).
|
||||
Msg("Failed to sync agent to database")
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up agents that are no longer discovered
|
||||
err := r.markOfflineAgents(r.ctx, discoveredAgents)
|
||||
if err != nil {
|
||||
log.Error().
|
||||
Err(err).
|
||||
Msg("Failed to mark offline agents")
|
||||
}
|
||||
}
|
||||
|
||||
// upsertAgent inserts or updates an agent in the database
|
||||
func (r *Registry) upsertAgent(ctx context.Context, agent *p2p.Agent) error {
|
||||
// Convert capabilities to JSON
|
||||
capabilitiesJSON, err := json.Marshal(agent.Capabilities)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal capabilities: %w", err)
|
||||
}
|
||||
|
||||
// Create performance metrics
|
||||
performanceMetrics := map[string]interface{}{
|
||||
"tasks_completed": agent.TasksCompleted,
|
||||
"current_team": agent.CurrentTeam,
|
||||
"model": agent.Model,
|
||||
"cluster_id": agent.ClusterID,
|
||||
"p2p_addr": agent.P2PAddr,
|
||||
}
|
||||
metricsJSON, err := json.Marshal(performanceMetrics)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal performance metrics: %w", err)
|
||||
}
|
||||
|
||||
// Map P2P status to database status
|
||||
dbStatus := r.mapStatusToDatabase(agent.Status)
|
||||
|
||||
// Use upsert query to insert or update
|
||||
query := `
|
||||
INSERT INTO agents (id, name, endpoint_url, capabilities, status, last_seen, performance_metrics, current_tasks, success_rate)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
||||
ON CONFLICT (id)
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
endpoint_url = EXCLUDED.endpoint_url,
|
||||
capabilities = EXCLUDED.capabilities,
|
||||
status = EXCLUDED.status,
|
||||
last_seen = EXCLUDED.last_seen,
|
||||
performance_metrics = EXCLUDED.performance_metrics,
|
||||
current_tasks = EXCLUDED.current_tasks,
|
||||
updated_at = NOW()
|
||||
RETURNING id
|
||||
`
|
||||
|
||||
// Generate UUID from agent ID for database consistency
|
||||
agentUUID, err := r.generateConsistentUUID(agent.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate UUID: %w", err)
|
||||
}
|
||||
|
||||
var resultID uuid.UUID
|
||||
err = r.db.QueryRow(ctx, query,
|
||||
agentUUID, // id
|
||||
agent.Name, // name
|
||||
agent.Endpoint, // endpoint_url
|
||||
capabilitiesJSON, // capabilities
|
||||
dbStatus, // status
|
||||
agent.LastSeen, // last_seen
|
||||
metricsJSON, // performance_metrics
|
||||
r.getCurrentTaskCount(agent), // current_tasks
|
||||
r.calculateSuccessRate(agent), // success_rate
|
||||
).Scan(&resultID)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to upsert agent: %w", err)
|
||||
}
|
||||
|
||||
log.Debug().
|
||||
Str("agent_id", agent.ID).
|
||||
Str("db_uuid", resultID.String()).
|
||||
Str("status", dbStatus).
|
||||
Msg("Synced agent to database")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// markOfflineAgents marks agents as offline if they're no longer discovered
|
||||
func (r *Registry) markOfflineAgents(ctx context.Context, discoveredAgents []*p2p.Agent) error {
|
||||
// Build list of currently discovered agent IDs
|
||||
discoveredIDs := make([]string, len(discoveredAgents))
|
||||
for i, agent := range discoveredAgents {
|
||||
discoveredIDs[i] = agent.ID
|
||||
}
|
||||
|
||||
// Convert to UUIDs for database query
|
||||
discoveredUUIDs := make([]uuid.UUID, len(discoveredIDs))
|
||||
for i, id := range discoveredIDs {
|
||||
uuid, err := r.generateConsistentUUID(id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate UUID for %s: %w", id, err)
|
||||
}
|
||||
discoveredUUIDs[i] = uuid
|
||||
}
|
||||
|
||||
// If no agents discovered, don't mark all as offline (could be temporary network issue)
|
||||
if len(discoveredUUIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Mark agents as offline if they haven't been seen and aren't in discovered list
|
||||
query := `
|
||||
UPDATE agents
|
||||
SET status = 'offline', updated_at = NOW()
|
||||
WHERE status != 'offline'
|
||||
AND last_seen < NOW() - INTERVAL '2 minutes'
|
||||
AND id != ALL($1)
|
||||
`
|
||||
|
||||
result, err := r.db.Exec(ctx, query, discoveredUUIDs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to mark offline agents: %w", err)
|
||||
}
|
||||
|
||||
rowsAffected := result.RowsAffected()
|
||||
if rowsAffected > 0 {
|
||||
log.Info().
|
||||
Int64("agents_marked_offline", rowsAffected).
|
||||
Msg("Marked agents as offline")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mapStatusToDatabase maps P2P status to database status values
|
||||
func (r *Registry) mapStatusToDatabase(p2pStatus string) string {
|
||||
switch p2pStatus {
|
||||
case "online":
|
||||
return "available"
|
||||
case "idle":
|
||||
return "idle"
|
||||
case "working":
|
||||
return "busy"
|
||||
default:
|
||||
return "available"
|
||||
}
|
||||
}
|
||||
|
||||
// getCurrentTaskCount estimates current task count based on status
|
||||
func (r *Registry) getCurrentTaskCount(agent *p2p.Agent) int {
|
||||
switch agent.Status {
|
||||
case "working":
|
||||
return 1
|
||||
case "idle", "online":
|
||||
return 0
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// calculateSuccessRate calculates success rate based on tasks completed
|
||||
func (r *Registry) calculateSuccessRate(agent *p2p.Agent) float64 {
|
||||
// For MVP, assume high success rate for all agents
|
||||
// In production, this would be calculated from actual task outcomes
|
||||
if agent.TasksCompleted > 0 {
|
||||
return 0.85 + (float64(agent.TasksCompleted)*0.01) // Success rate increases with experience
|
||||
}
|
||||
return 0.75 // Default for new agents
|
||||
}
|
||||
|
||||
// generateConsistentUUID generates a consistent UUID from a string ID
|
||||
// This ensures the same agent ID always maps to the same UUID
|
||||
func (r *Registry) generateConsistentUUID(agentID string) (uuid.UUID, error) {
|
||||
// Use UUID v5 (name-based) to generate consistent UUIDs
|
||||
// This ensures the same agent ID always produces the same UUID
|
||||
namespace := uuid.MustParse("6ba7b810-9dad-11d1-80b4-00c04fd430c8") // DNS namespace UUID
|
||||
return uuid.NewSHA1(namespace, []byte(agentID)), nil
|
||||
}
|
||||
|
||||
// GetAvailableAgents returns agents that are available for task assignment
|
||||
func (r *Registry) GetAvailableAgents(ctx context.Context) ([]*DatabaseAgent, error) {
|
||||
query := `
|
||||
SELECT id, name, endpoint_url, capabilities, status, last_seen,
|
||||
performance_metrics, current_tasks, success_rate, created_at, updated_at
|
||||
FROM agents
|
||||
WHERE status IN ('available', 'idle')
|
||||
AND last_seen > NOW() - INTERVAL '5 minutes'
|
||||
ORDER BY success_rate DESC, current_tasks ASC
|
||||
`
|
||||
|
||||
rows, err := r.db.Query(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query available agents: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var agents []*DatabaseAgent
|
||||
|
||||
for rows.Next() {
|
||||
agent := &DatabaseAgent{}
|
||||
var capabilitiesJSON, metricsJSON []byte
|
||||
|
||||
err := rows.Scan(
|
||||
&agent.ID, &agent.Name, &agent.EndpointURL, &capabilitiesJSON,
|
||||
&agent.Status, &agent.LastSeen, &metricsJSON,
|
||||
&agent.CurrentTasks, &agent.SuccessRate,
|
||||
&agent.CreatedAt, &agent.UpdatedAt,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan agent row: %w", err)
|
||||
}
|
||||
|
||||
// Parse JSON fields
|
||||
if len(capabilitiesJSON) > 0 {
|
||||
json.Unmarshal(capabilitiesJSON, &agent.Capabilities)
|
||||
}
|
||||
if len(metricsJSON) > 0 {
|
||||
json.Unmarshal(metricsJSON, &agent.PerformanceMetrics)
|
||||
}
|
||||
|
||||
agents = append(agents, agent)
|
||||
}
|
||||
|
||||
return agents, rows.Err()
|
||||
}
|
||||
|
||||
// DatabaseAgent represents an agent as stored in the database
|
||||
type DatabaseAgent struct {
|
||||
ID uuid.UUID `json:"id" db:"id"`
|
||||
Name string `json:"name" db:"name"`
|
||||
EndpointURL string `json:"endpoint_url" db:"endpoint_url"`
|
||||
Capabilities map[string]interface{} `json:"capabilities" db:"capabilities"`
|
||||
Status string `json:"status" db:"status"`
|
||||
LastSeen time.Time `json:"last_seen" db:"last_seen"`
|
||||
PerformanceMetrics map[string]interface{} `json:"performance_metrics" db:"performance_metrics"`
|
||||
CurrentTasks int `json:"current_tasks" db:"current_tasks"`
|
||||
SuccessRate float64 `json:"success_rate" db:"success_rate"`
|
||||
CreatedAt time.Time `json:"created_at" db:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
|
||||
}
|
||||
Reference in New Issue
Block a user