Implement initial scan logic and council formation for WHOOSH project kickoffs

- Replace incremental sync with full scan for new repositories - Add initial_scan status to bypass Since parameter filtering - Implement council formation detection for Design Brief issues - Add version display to WHOOSH UI header for debugging - Fix Docker token authentication with trailing newline removal - Add comprehensive council orchestration with Docker Swarm integration - Include BACKBEAT prototype integration for distributed timing - Support council-specific agent roles and deployment strategies - Transition repositories to active status after content discovery Key architectural improvements: - Full scan approach for new project detection vs incremental sync - Council formation triggered by chorus-entrypoint labeled Design Briefs - Proper token handling and authentication for Gitea API calls - Support for both initial discovery and ongoing task monitoring This enables autonomous project kickoff workflows where Design Brief issues automatically trigger formation of specialized agent councils for new projects. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-12 09:49:36 +10:00
parent b5c0deb6bc
commit 56ea52b743
74 changed files with 17778 additions and 236 deletions
--- a/internal/p2p/discovery.go
+++ b/internal/p2p/discovery.go
@@ -2,7 +2,6 @@ package p2p

 import (
 	"context"
-	"fmt"
 	"net"
 	"net/http"
 	"sync"
@@ -119,155 +118,87 @@ func (d *Discovery) GetAgents() []*Agent {

 // listenForBroadcasts listens for CHORUS agent P2P broadcasts
 func (d *Discovery) listenForBroadcasts() {
-	// For now, simulate discovering the 9 CHORUS replicas that are running
-	// In a full implementation, this would listen on UDP multicast for actual P2P broadcasts
+	log.Info().Msg("🔍 Starting real CHORUS agent discovery")
 	
-	log.Info().Msg("🔍 Simulating P2P discovery of CHORUS agents")
-	
-	// Since we know CHORUS is running 9 replicas, let's simulate discovering them
-	ticker := time.NewTicker(10 * time.Second)
+	// Real discovery polling every 30 seconds to avoid overwhelming the service
+	ticker := time.NewTicker(30 * time.Second)
 	defer ticker.Stop()
 	
+	// Run initial discovery immediately
+	d.discoverRealCHORUSAgents()
+	
 	for {
 		select {
 		case <-d.ctx.Done():
 			return
 		case <-ticker.C:
-			d.simulateAgentDiscovery()
+			d.discoverRealCHORUSAgents()
 		}
 	}
 }

-// simulateAgentDiscovery discovers CHORUS agents by querying their health endpoints
-func (d *Discovery) simulateAgentDiscovery() {
-	log.Debug().Msg("🔍 Discovering CHORUS agents via health endpoints")
+// discoverRealCHORUSAgents discovers actual CHORUS agents by querying their health endpoints
+func (d *Discovery) discoverRealCHORUSAgents() {
+	log.Debug().Msg("🔍 Discovering real CHORUS agents via health endpoints")
 	
-	// Query Docker DNS for CHORUS service tasks
-	// In Docker Swarm, tasks can be discovered via the service name
-	d.discoverCHORUSReplicas()
+	// Query the actual CHORUS service to see what's running
+	d.queryActualCHORUSService()
 }

-// discoverCHORUSReplicas discovers running CHORUS replicas in the Docker Swarm network.
-// This function implements a discovery strategy that works around Docker Swarm's round-robin
-// DNS by making multiple requests to discover individual service replicas.
-//
-// Technical challenges and solutions:
-// 1. Docker Swarm round-robin DNS makes it hard to discover individual replicas
-// 2. We use multiple HTTP requests to hit different replicas via load balancer
-// 3. Generate synthetic agent IDs since CHORUS doesn't expose unique identifiers yet
-// 4. Create realistic agent metadata for team formation algorithms
-//
-// This approach is a pragmatic MVP solution - in production, CHORUS agents would
-// register themselves with unique IDs and capabilities via a proper discovery protocol.
-func (d *Discovery) discoverCHORUSReplicas() {
-	// HTTP client with short timeout for health checks. We use 5 seconds because:
-	// 1. Health endpoints should respond quickly (< 1s typically)
-	// 2. We're making multiple requests, so timeouts add up
-	// 3. Docker Swarm networking is usually fast within cluster
-	client := &http.Client{Timeout: 5 * time.Second}
-	baseTime := time.Now() // Consistent timestamp for this discovery cycle
+// queryActualCHORUSService queries the real CHORUS service to discover actual running agents.
+// This function replaces the previous simulation and discovers only what's actually running.
+func (d *Discovery) queryActualCHORUSService() {
+	client := &http.Client{Timeout: 10 * time.Second}
 	
-	// Local map to track agents discovered in this cycle. We use a map to ensure
-	// we don't create duplicate agents if we happen to hit the same replica twice.
-	discovered := make(map[string]*Agent)
+	// Try to query the CHORUS health endpoint
+	endpoint := "http://chorus:8081/health"
+	resp, err := client.Get(endpoint)
+	if err != nil {
+		log.Debug().
+			Err(err).
+			Str("endpoint", endpoint).
+			Msg("Failed to reach CHORUS health endpoint")
+		return
+	}
+	defer resp.Body.Close()
 	
-	// Discovery strategy: Make multiple requests to the service endpoint.
-	// Docker Swarm's round-robin load balancing will distribute these across
-	// different replicas, allowing us to discover individual instances.
-	// 15 attempts gives us good coverage of a 9-replica service.
-	for attempt := 1; attempt <= 15; attempt++ {
-		// Use the CHORUS health port (8081) rather than API port (8080) because:
-		// 1. Health endpoints are lightweight and fast
-		// 2. They don't require authentication or complex request processing
-		// 3. They're designed to be called frequently for monitoring
-		endpoint := "http://chorus:8081/health"
-		
-		// Make the health check request. Docker Swarm will route this to one
-		// of the available CHORUS replicas based on its load balancing algorithm.
-		resp, err := client.Get(endpoint)
-		if err != nil {
-			// Log connection failures at debug level since some failures are expected
-			// during service startup or when replicas are being updated.
-			log.Debug().
-				Err(err).
-				Str("endpoint", endpoint).
-				Int("attempt", attempt).
-				Msg("Failed to query CHORUS health endpoint")
-			continue
-		}
-		
-		// Process successful health check responses
-		if resp.StatusCode == http.StatusOK {
-			// Generate a synthetic agent ID since CHORUS doesn't provide unique IDs yet.
-			// In production, this would come from the health check response body.
-			// Using zero-padded numbers ensures consistent sorting in the UI.
-			agentID := fmt.Sprintf("chorus-agent-%03d", len(discovered)+1)
-			
-			// Only create new agent if we haven't seen this ID before in this cycle
-			if _, exists := discovered[agentID]; !exists {
-				// Create agent with realistic metadata for team formation.
-				// These capabilities and models would normally come from the
-				// actual CHORUS agent configuration.
-				agent := &Agent{
-					ID:   agentID,
-					Name: fmt.Sprintf("CHORUS Agent %d", len(discovered)+1),
-					Status: "online", // Default to online since health check succeeded
-					
-					// Standard CHORUS agent capabilities - these define what types of
-					// tasks the agent can handle in team formation algorithms
-					Capabilities: []string{"general_development", "task_coordination", "ai_integration"},
-					
-					Model:    "llama3.1:8b",    // Standard model for CHORUS agents
-					Endpoint: "http://chorus:8080", // API port for task assignment
-					LastSeen: baseTime,         // Consistent timestamp for this discovery cycle
-					
-					// Synthetic task completion count for load balancing algorithms.
-					// In production, this would be actual metrics from agent performance.
-					TasksCompleted: len(discovered) * 2,
-					
-					P2PAddr:   "chorus:9000",           // P2P communication port
-					ClusterID: "docker-unified-stack",   // Docker Swarm cluster identifier
-				}
-				
-				// Add some variety to agent status for realistic team formation testing.
-				// This simulates real-world scenarios where agents have different availability.
-				if len(discovered)%3 == 0 {
-					agent.Status = "idle" // Every third agent is idle
-				} else if len(discovered) == 6 {
-					// One agent is actively working on a team assignment
-					agent.Status = "working"
-					agent.CurrentTeam = "development-team-alpha"
-				}
-				
-				// Add to discovered agents and log the discovery
-				discovered[agentID] = agent
-				log.Debug().
-					Str("agent_id", agentID).
-					Str("status", agent.Status).
-					Msg("🤖 Discovered CHORUS agent")
-			}
-		}
-		resp.Body.Close()
-		
-		// Stop discovery once we've found the expected number of agents.
-		// This prevents unnecessary HTTP requests and speeds up discovery cycles.
-		if len(discovered) >= 9 {
-			break
-		}
-		
-		// Brief pause between requests to avoid overwhelming the service and
-		// to allow Docker Swarm's load balancer to potentially route to different replicas.
-		time.Sleep(100 * time.Millisecond)
+	if resp.StatusCode != http.StatusOK {
+		log.Debug().
+			Int("status_code", resp.StatusCode).
+			Str("endpoint", endpoint).
+			Msg("CHORUS health endpoint returned non-200 status")
+		return
 	}
 	
-	// Add all discovered agents
-	for _, agent := range discovered {
-		d.addOrUpdateAgent(agent)
+	// CHORUS is responding, so create a single agent entry for the actual instance
+	agentID := "chorus-agent-001"
+	agent := &Agent{
+		ID:     agentID,
+		Name:   "CHORUS Agent",
+		Status: "online",
+		Capabilities: []string{
+			"general_development",
+			"task_coordination", 
+			"ai_integration",
+			"code_analysis",
+			"autonomous_development",
+		},
+		Model:          "llama3.1:8b",
+		Endpoint:       "http://chorus:8080",
+		LastSeen:       time.Now(),
+		TasksCompleted: 0, // Will be updated by actual task completion tracking
+		P2PAddr:        "chorus:9000",
+		ClusterID:      "docker-unified-stack",
 	}
 	
+	// Check if CHORUS has an API endpoint that provides more detailed info
+	// For now, we'll just use the single discovered instance
+	d.addOrUpdateAgent(agent)
+	
 	log.Info().
-		Int("discovered_count", len(discovered)).
-		Msg("🎭 CHORUS agent discovery completed")
+		Str("agent_id", agentID).
+		Str("endpoint", endpoint).
+		Msg("🤖 Discovered real CHORUS agent")
 }

 // addOrUpdateAgent adds or updates an agent in the discovery cache