Implement initial scan logic and council formation for WHOOSH project kickoffs
- Replace incremental sync with full scan for new repositories - Add initial_scan status to bypass Since parameter filtering - Implement council formation detection for Design Brief issues - Add version display to WHOOSH UI header for debugging - Fix Docker token authentication with trailing newline removal - Add comprehensive council orchestration with Docker Swarm integration - Include BACKBEAT prototype integration for distributed timing - Support council-specific agent roles and deployment strategies - Transition repositories to active status after content discovery Key architectural improvements: - Full scan approach for new project detection vs incremental sync - Council formation triggered by chorus-entrypoint labeled Design Briefs - Proper token handling and authentication for Gitea API calls - Support for both initial discovery and ongoing task monitoring This enables autonomous project kickoff workflows where Design Brief issues automatically trigger formation of specialized agent councils for new projects. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,6 @@ package p2p
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
@@ -119,155 +118,87 @@ func (d *Discovery) GetAgents() []*Agent {
|
||||
|
||||
// listenForBroadcasts listens for CHORUS agent P2P broadcasts
|
||||
func (d *Discovery) listenForBroadcasts() {
|
||||
// For now, simulate discovering the 9 CHORUS replicas that are running
|
||||
// In a full implementation, this would listen on UDP multicast for actual P2P broadcasts
|
||||
log.Info().Msg("🔍 Starting real CHORUS agent discovery")
|
||||
|
||||
log.Info().Msg("🔍 Simulating P2P discovery of CHORUS agents")
|
||||
|
||||
// Since we know CHORUS is running 9 replicas, let's simulate discovering them
|
||||
ticker := time.NewTicker(10 * time.Second)
|
||||
// Real discovery polling every 30 seconds to avoid overwhelming the service
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Run initial discovery immediately
|
||||
d.discoverRealCHORUSAgents()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-d.ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
d.simulateAgentDiscovery()
|
||||
d.discoverRealCHORUSAgents()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// simulateAgentDiscovery discovers CHORUS agents by querying their health endpoints
|
||||
func (d *Discovery) simulateAgentDiscovery() {
|
||||
log.Debug().Msg("🔍 Discovering CHORUS agents via health endpoints")
|
||||
// discoverRealCHORUSAgents discovers actual CHORUS agents by querying their health endpoints
|
||||
func (d *Discovery) discoverRealCHORUSAgents() {
|
||||
log.Debug().Msg("🔍 Discovering real CHORUS agents via health endpoints")
|
||||
|
||||
// Query Docker DNS for CHORUS service tasks
|
||||
// In Docker Swarm, tasks can be discovered via the service name
|
||||
d.discoverCHORUSReplicas()
|
||||
// Query the actual CHORUS service to see what's running
|
||||
d.queryActualCHORUSService()
|
||||
}
|
||||
|
||||
// discoverCHORUSReplicas discovers running CHORUS replicas in the Docker Swarm network.
|
||||
// This function implements a discovery strategy that works around Docker Swarm's round-robin
|
||||
// DNS by making multiple requests to discover individual service replicas.
|
||||
//
|
||||
// Technical challenges and solutions:
|
||||
// 1. Docker Swarm round-robin DNS makes it hard to discover individual replicas
|
||||
// 2. We use multiple HTTP requests to hit different replicas via load balancer
|
||||
// 3. Generate synthetic agent IDs since CHORUS doesn't expose unique identifiers yet
|
||||
// 4. Create realistic agent metadata for team formation algorithms
|
||||
//
|
||||
// This approach is a pragmatic MVP solution - in production, CHORUS agents would
|
||||
// register themselves with unique IDs and capabilities via a proper discovery protocol.
|
||||
func (d *Discovery) discoverCHORUSReplicas() {
|
||||
// HTTP client with short timeout for health checks. We use 5 seconds because:
|
||||
// 1. Health endpoints should respond quickly (< 1s typically)
|
||||
// 2. We're making multiple requests, so timeouts add up
|
||||
// 3. Docker Swarm networking is usually fast within cluster
|
||||
client := &http.Client{Timeout: 5 * time.Second}
|
||||
baseTime := time.Now() // Consistent timestamp for this discovery cycle
|
||||
// queryActualCHORUSService queries the real CHORUS service to discover actual running agents.
|
||||
// This function replaces the previous simulation and discovers only what's actually running.
|
||||
func (d *Discovery) queryActualCHORUSService() {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
// Local map to track agents discovered in this cycle. We use a map to ensure
|
||||
// we don't create duplicate agents if we happen to hit the same replica twice.
|
||||
discovered := make(map[string]*Agent)
|
||||
// Try to query the CHORUS health endpoint
|
||||
endpoint := "http://chorus:8081/health"
|
||||
resp, err := client.Get(endpoint)
|
||||
if err != nil {
|
||||
log.Debug().
|
||||
Err(err).
|
||||
Str("endpoint", endpoint).
|
||||
Msg("Failed to reach CHORUS health endpoint")
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Discovery strategy: Make multiple requests to the service endpoint.
|
||||
// Docker Swarm's round-robin load balancing will distribute these across
|
||||
// different replicas, allowing us to discover individual instances.
|
||||
// 15 attempts gives us good coverage of a 9-replica service.
|
||||
for attempt := 1; attempt <= 15; attempt++ {
|
||||
// Use the CHORUS health port (8081) rather than API port (8080) because:
|
||||
// 1. Health endpoints are lightweight and fast
|
||||
// 2. They don't require authentication or complex request processing
|
||||
// 3. They're designed to be called frequently for monitoring
|
||||
endpoint := "http://chorus:8081/health"
|
||||
|
||||
// Make the health check request. Docker Swarm will route this to one
|
||||
// of the available CHORUS replicas based on its load balancing algorithm.
|
||||
resp, err := client.Get(endpoint)
|
||||
if err != nil {
|
||||
// Log connection failures at debug level since some failures are expected
|
||||
// during service startup or when replicas are being updated.
|
||||
log.Debug().
|
||||
Err(err).
|
||||
Str("endpoint", endpoint).
|
||||
Int("attempt", attempt).
|
||||
Msg("Failed to query CHORUS health endpoint")
|
||||
continue
|
||||
}
|
||||
|
||||
// Process successful health check responses
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
// Generate a synthetic agent ID since CHORUS doesn't provide unique IDs yet.
|
||||
// In production, this would come from the health check response body.
|
||||
// Using zero-padded numbers ensures consistent sorting in the UI.
|
||||
agentID := fmt.Sprintf("chorus-agent-%03d", len(discovered)+1)
|
||||
|
||||
// Only create new agent if we haven't seen this ID before in this cycle
|
||||
if _, exists := discovered[agentID]; !exists {
|
||||
// Create agent with realistic metadata for team formation.
|
||||
// These capabilities and models would normally come from the
|
||||
// actual CHORUS agent configuration.
|
||||
agent := &Agent{
|
||||
ID: agentID,
|
||||
Name: fmt.Sprintf("CHORUS Agent %d", len(discovered)+1),
|
||||
Status: "online", // Default to online since health check succeeded
|
||||
|
||||
// Standard CHORUS agent capabilities - these define what types of
|
||||
// tasks the agent can handle in team formation algorithms
|
||||
Capabilities: []string{"general_development", "task_coordination", "ai_integration"},
|
||||
|
||||
Model: "llama3.1:8b", // Standard model for CHORUS agents
|
||||
Endpoint: "http://chorus:8080", // API port for task assignment
|
||||
LastSeen: baseTime, // Consistent timestamp for this discovery cycle
|
||||
|
||||
// Synthetic task completion count for load balancing algorithms.
|
||||
// In production, this would be actual metrics from agent performance.
|
||||
TasksCompleted: len(discovered) * 2,
|
||||
|
||||
P2PAddr: "chorus:9000", // P2P communication port
|
||||
ClusterID: "docker-unified-stack", // Docker Swarm cluster identifier
|
||||
}
|
||||
|
||||
// Add some variety to agent status for realistic team formation testing.
|
||||
// This simulates real-world scenarios where agents have different availability.
|
||||
if len(discovered)%3 == 0 {
|
||||
agent.Status = "idle" // Every third agent is idle
|
||||
} else if len(discovered) == 6 {
|
||||
// One agent is actively working on a team assignment
|
||||
agent.Status = "working"
|
||||
agent.CurrentTeam = "development-team-alpha"
|
||||
}
|
||||
|
||||
// Add to discovered agents and log the discovery
|
||||
discovered[agentID] = agent
|
||||
log.Debug().
|
||||
Str("agent_id", agentID).
|
||||
Str("status", agent.Status).
|
||||
Msg("🤖 Discovered CHORUS agent")
|
||||
}
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
// Stop discovery once we've found the expected number of agents.
|
||||
// This prevents unnecessary HTTP requests and speeds up discovery cycles.
|
||||
if len(discovered) >= 9 {
|
||||
break
|
||||
}
|
||||
|
||||
// Brief pause between requests to avoid overwhelming the service and
|
||||
// to allow Docker Swarm's load balancer to potentially route to different replicas.
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
log.Debug().
|
||||
Int("status_code", resp.StatusCode).
|
||||
Str("endpoint", endpoint).
|
||||
Msg("CHORUS health endpoint returned non-200 status")
|
||||
return
|
||||
}
|
||||
|
||||
// Add all discovered agents
|
||||
for _, agent := range discovered {
|
||||
d.addOrUpdateAgent(agent)
|
||||
// CHORUS is responding, so create a single agent entry for the actual instance
|
||||
agentID := "chorus-agent-001"
|
||||
agent := &Agent{
|
||||
ID: agentID,
|
||||
Name: "CHORUS Agent",
|
||||
Status: "online",
|
||||
Capabilities: []string{
|
||||
"general_development",
|
||||
"task_coordination",
|
||||
"ai_integration",
|
||||
"code_analysis",
|
||||
"autonomous_development",
|
||||
},
|
||||
Model: "llama3.1:8b",
|
||||
Endpoint: "http://chorus:8080",
|
||||
LastSeen: time.Now(),
|
||||
TasksCompleted: 0, // Will be updated by actual task completion tracking
|
||||
P2PAddr: "chorus:9000",
|
||||
ClusterID: "docker-unified-stack",
|
||||
}
|
||||
|
||||
// Check if CHORUS has an API endpoint that provides more detailed info
|
||||
// For now, we'll just use the single discovered instance
|
||||
d.addOrUpdateAgent(agent)
|
||||
|
||||
log.Info().
|
||||
Int("discovered_count", len(discovered)).
|
||||
Msg("🎭 CHORUS agent discovery completed")
|
||||
Str("agent_id", agentID).
|
||||
Str("endpoint", endpoint).
|
||||
Msg("🤖 Discovered real CHORUS agent")
|
||||
}
|
||||
|
||||
// addOrUpdateAgent adds or updates an agent in the discovery cache
|
||||
|
||||
Reference in New Issue
Block a user