Implement initial scan logic and council formation for WHOOSH project kickoffs

- Replace incremental sync with full scan for new repositories
- Add initial_scan status to bypass Since parameter filtering
- Implement council formation detection for Design Brief issues
- Add version display to WHOOSH UI header for debugging
- Fix Docker token authentication with trailing newline removal
- Add comprehensive council orchestration with Docker Swarm integration
- Include BACKBEAT prototype integration for distributed timing
- Support council-specific agent roles and deployment strategies
- Transition repositories to active status after content discovery

Key architectural improvements:
- Full scan approach for new project detection vs incremental sync
- Council formation triggered by chorus-entrypoint labeled Design Briefs
- Proper token handling and authentication for Gitea API calls
- Support for both initial discovery and ongoing task monitoring

This enables autonomous project kickoff workflows where Design Brief issues
automatically trigger formation of specialized agent councils for new projects.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Code
2025-09-12 09:49:36 +10:00
parent b5c0deb6bc
commit 56ea52b743
74 changed files with 17778 additions and 236 deletions

328
internal/agents/registry.go Normal file
View File

@@ -0,0 +1,328 @@
package agents
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/chorus-services/whoosh/internal/p2p"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
)
// Registry manages agent registration and synchronization with the database
type Registry struct {
db *pgxpool.Pool
discovery *p2p.Discovery
stopCh chan struct{}
ctx context.Context
cancel context.CancelFunc
}
// NewRegistry creates a new agent registry service
func NewRegistry(db *pgxpool.Pool, discovery *p2p.Discovery) *Registry {
ctx, cancel := context.WithCancel(context.Background())
return &Registry{
db: db,
discovery: discovery,
stopCh: make(chan struct{}),
ctx: ctx,
cancel: cancel,
}
}
// Start begins the agent registry synchronization
func (r *Registry) Start() error {
log.Info().Msg("🔄 Starting CHORUS agent registry synchronization")
// Start periodic synchronization of discovered agents with database
go r.syncDiscoveredAgents()
return nil
}
// Stop shuts down the agent registry
func (r *Registry) Stop() error {
log.Info().Msg("🔄 Stopping CHORUS agent registry synchronization")
r.cancel()
close(r.stopCh)
return nil
}
// syncDiscoveredAgents periodically syncs P2P discovered agents to database
func (r *Registry) syncDiscoveredAgents() {
// Initial sync
r.performSync()
// Then sync every 30 seconds
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-r.ctx.Done():
return
case <-ticker.C:
r.performSync()
}
}
}
// performSync synchronizes discovered agents with the database
func (r *Registry) performSync() {
discoveredAgents := r.discovery.GetAgents()
log.Debug().
Int("discovered_count", len(discoveredAgents)).
Msg("Synchronizing discovered agents with database")
for _, agent := range discoveredAgents {
err := r.upsertAgent(r.ctx, agent)
if err != nil {
log.Error().
Err(err).
Str("agent_id", agent.ID).
Msg("Failed to sync agent to database")
}
}
// Clean up agents that are no longer discovered
err := r.markOfflineAgents(r.ctx, discoveredAgents)
if err != nil {
log.Error().
Err(err).
Msg("Failed to mark offline agents")
}
}
// upsertAgent inserts or updates an agent in the database
func (r *Registry) upsertAgent(ctx context.Context, agent *p2p.Agent) error {
// Convert capabilities to JSON
capabilitiesJSON, err := json.Marshal(agent.Capabilities)
if err != nil {
return fmt.Errorf("failed to marshal capabilities: %w", err)
}
// Create performance metrics
performanceMetrics := map[string]interface{}{
"tasks_completed": agent.TasksCompleted,
"current_team": agent.CurrentTeam,
"model": agent.Model,
"cluster_id": agent.ClusterID,
"p2p_addr": agent.P2PAddr,
}
metricsJSON, err := json.Marshal(performanceMetrics)
if err != nil {
return fmt.Errorf("failed to marshal performance metrics: %w", err)
}
// Map P2P status to database status
dbStatus := r.mapStatusToDatabase(agent.Status)
// Use upsert query to insert or update
query := `
INSERT INTO agents (id, name, endpoint_url, capabilities, status, last_seen, performance_metrics, current_tasks, success_rate)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
ON CONFLICT (id)
DO UPDATE SET
name = EXCLUDED.name,
endpoint_url = EXCLUDED.endpoint_url,
capabilities = EXCLUDED.capabilities,
status = EXCLUDED.status,
last_seen = EXCLUDED.last_seen,
performance_metrics = EXCLUDED.performance_metrics,
current_tasks = EXCLUDED.current_tasks,
updated_at = NOW()
RETURNING id
`
// Generate UUID from agent ID for database consistency
agentUUID, err := r.generateConsistentUUID(agent.ID)
if err != nil {
return fmt.Errorf("failed to generate UUID: %w", err)
}
var resultID uuid.UUID
err = r.db.QueryRow(ctx, query,
agentUUID, // id
agent.Name, // name
agent.Endpoint, // endpoint_url
capabilitiesJSON, // capabilities
dbStatus, // status
agent.LastSeen, // last_seen
metricsJSON, // performance_metrics
r.getCurrentTaskCount(agent), // current_tasks
r.calculateSuccessRate(agent), // success_rate
).Scan(&resultID)
if err != nil {
return fmt.Errorf("failed to upsert agent: %w", err)
}
log.Debug().
Str("agent_id", agent.ID).
Str("db_uuid", resultID.String()).
Str("status", dbStatus).
Msg("Synced agent to database")
return nil
}
// markOfflineAgents marks agents as offline if they're no longer discovered
func (r *Registry) markOfflineAgents(ctx context.Context, discoveredAgents []*p2p.Agent) error {
// Build list of currently discovered agent IDs
discoveredIDs := make([]string, len(discoveredAgents))
for i, agent := range discoveredAgents {
discoveredIDs[i] = agent.ID
}
// Convert to UUIDs for database query
discoveredUUIDs := make([]uuid.UUID, len(discoveredIDs))
for i, id := range discoveredIDs {
uuid, err := r.generateConsistentUUID(id)
if err != nil {
return fmt.Errorf("failed to generate UUID for %s: %w", id, err)
}
discoveredUUIDs[i] = uuid
}
// If no agents discovered, don't mark all as offline (could be temporary network issue)
if len(discoveredUUIDs) == 0 {
return nil
}
// Mark agents as offline if they haven't been seen and aren't in discovered list
query := `
UPDATE agents
SET status = 'offline', updated_at = NOW()
WHERE status != 'offline'
AND last_seen < NOW() - INTERVAL '2 minutes'
AND id != ALL($1)
`
result, err := r.db.Exec(ctx, query, discoveredUUIDs)
if err != nil {
return fmt.Errorf("failed to mark offline agents: %w", err)
}
rowsAffected := result.RowsAffected()
if rowsAffected > 0 {
log.Info().
Int64("agents_marked_offline", rowsAffected).
Msg("Marked agents as offline")
}
return nil
}
// mapStatusToDatabase maps P2P status to database status values
func (r *Registry) mapStatusToDatabase(p2pStatus string) string {
switch p2pStatus {
case "online":
return "available"
case "idle":
return "idle"
case "working":
return "busy"
default:
return "available"
}
}
// getCurrentTaskCount estimates current task count based on status
func (r *Registry) getCurrentTaskCount(agent *p2p.Agent) int {
switch agent.Status {
case "working":
return 1
case "idle", "online":
return 0
default:
return 0
}
}
// calculateSuccessRate calculates success rate based on tasks completed
func (r *Registry) calculateSuccessRate(agent *p2p.Agent) float64 {
// For MVP, assume high success rate for all agents
// In production, this would be calculated from actual task outcomes
if agent.TasksCompleted > 0 {
return 0.85 + (float64(agent.TasksCompleted)*0.01) // Success rate increases with experience
}
return 0.75 // Default for new agents
}
// generateConsistentUUID generates a consistent UUID from a string ID
// This ensures the same agent ID always maps to the same UUID
func (r *Registry) generateConsistentUUID(agentID string) (uuid.UUID, error) {
// Use UUID v5 (name-based) to generate consistent UUIDs
// This ensures the same agent ID always produces the same UUID
namespace := uuid.MustParse("6ba7b810-9dad-11d1-80b4-00c04fd430c8") // DNS namespace UUID
return uuid.NewSHA1(namespace, []byte(agentID)), nil
}
// GetAvailableAgents returns agents that are available for task assignment
func (r *Registry) GetAvailableAgents(ctx context.Context) ([]*DatabaseAgent, error) {
query := `
SELECT id, name, endpoint_url, capabilities, status, last_seen,
performance_metrics, current_tasks, success_rate, created_at, updated_at
FROM agents
WHERE status IN ('available', 'idle')
AND last_seen > NOW() - INTERVAL '5 minutes'
ORDER BY success_rate DESC, current_tasks ASC
`
rows, err := r.db.Query(ctx, query)
if err != nil {
return nil, fmt.Errorf("failed to query available agents: %w", err)
}
defer rows.Close()
var agents []*DatabaseAgent
for rows.Next() {
agent := &DatabaseAgent{}
var capabilitiesJSON, metricsJSON []byte
err := rows.Scan(
&agent.ID, &agent.Name, &agent.EndpointURL, &capabilitiesJSON,
&agent.Status, &agent.LastSeen, &metricsJSON,
&agent.CurrentTasks, &agent.SuccessRate,
&agent.CreatedAt, &agent.UpdatedAt,
)
if err != nil {
return nil, fmt.Errorf("failed to scan agent row: %w", err)
}
// Parse JSON fields
if len(capabilitiesJSON) > 0 {
json.Unmarshal(capabilitiesJSON, &agent.Capabilities)
}
if len(metricsJSON) > 0 {
json.Unmarshal(metricsJSON, &agent.PerformanceMetrics)
}
agents = append(agents, agent)
}
return agents, rows.Err()
}
// DatabaseAgent represents an agent as stored in the database
type DatabaseAgent struct {
ID uuid.UUID `json:"id" db:"id"`
Name string `json:"name" db:"name"`
EndpointURL string `json:"endpoint_url" db:"endpoint_url"`
Capabilities map[string]interface{} `json:"capabilities" db:"capabilities"`
Status string `json:"status" db:"status"`
LastSeen time.Time `json:"last_seen" db:"last_seen"`
PerformanceMetrics map[string]interface{} `json:"performance_metrics" db:"performance_metrics"`
CurrentTasks int `json:"current_tasks" db:"current_tasks"`
SuccessRate float64 `json:"success_rate" db:"success_rate"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}

View File

@@ -16,6 +16,7 @@ type Config struct {
Auth AuthConfig `envconfig:"auth"`
Logging LoggingConfig `envconfig:"logging"`
BACKBEAT BackbeatConfig `envconfig:"backbeat"`
Docker DockerConfig `envconfig:"docker"`
}
type ServerConfig struct {
@@ -77,6 +78,11 @@ type BackbeatConfig struct {
NATSUrl string `envconfig:"NATS_URL" default:"nats://backbeat-nats:4222"`
}
type DockerConfig struct {
Enabled bool `envconfig:"ENABLED" default:"true"`
Host string `envconfig:"HOST" default:"unix:///var/run/docker.sock"`
}
func readSecretFile(filePath string) (string, error) {
if filePath == "" {
return "", nil

View File

@@ -0,0 +1,257 @@
package council
import (
"context"
"encoding/json"
"fmt"
"strings"
"time"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
)
// CouncilComposer manages the formation and orchestration of project kickoff councils
type CouncilComposer struct {
db *pgxpool.Pool
ctx context.Context
cancel context.CancelFunc
}
// NewCouncilComposer creates a new council composer service
func NewCouncilComposer(db *pgxpool.Pool) *CouncilComposer {
ctx, cancel := context.WithCancel(context.Background())
return &CouncilComposer{
db: db,
ctx: ctx,
cancel: cancel,
}
}
// Close shuts down the council composer
func (cc *CouncilComposer) Close() error {
cc.cancel()
return nil
}
// FormCouncil creates a council composition for a project kickoff
func (cc *CouncilComposer) FormCouncil(ctx context.Context, request *CouncilFormationRequest) (*CouncilComposition, error) {
startTime := time.Now()
councilID := uuid.New()
log.Info().
Str("council_id", councilID.String()).
Str("project_name", request.ProjectName).
Str("repository", request.Repository).
Msg("🎭 Forming project kickoff council")
// Create core council agents (always required)
coreAgents := make([]CouncilAgent, len(CoreCouncilRoles))
for i, roleName := range CoreCouncilRoles {
agentID := fmt.Sprintf("council-%s-%s", strings.ReplaceAll(request.ProjectName, " ", "-"), roleName)
coreAgents[i] = CouncilAgent{
AgentID: agentID,
RoleName: roleName,
AgentName: cc.formatRoleName(roleName),
Required: true,
Deployed: false,
Status: "pending",
}
}
// Determine optional agents based on project characteristics
optionalAgents := cc.selectOptionalAgents(request)
// Create council composition
composition := &CouncilComposition{
CouncilID: councilID,
ProjectName: request.ProjectName,
CoreAgents: coreAgents,
OptionalAgents: optionalAgents,
CreatedAt: startTime,
Status: "forming",
}
// Store council composition in database
err := cc.storeCouncilComposition(ctx, composition, request)
if err != nil {
return nil, fmt.Errorf("failed to store council composition: %w", err)
}
log.Info().
Str("council_id", councilID.String()).
Int("core_agents", len(coreAgents)).
Int("optional_agents", len(optionalAgents)).
Dur("formation_time", time.Since(startTime)).
Msg("✅ Council composition formed")
return composition, nil
}
// selectOptionalAgents determines which optional council agents should be included
func (cc *CouncilComposer) selectOptionalAgents(request *CouncilFormationRequest) []CouncilAgent {
var selectedAgents []CouncilAgent
// Analyze project brief and characteristics to determine needed optional roles
brief := strings.ToLower(request.ProjectBrief)
// Data/AI projects
if strings.Contains(brief, "ai") || strings.Contains(brief, "machine learning") ||
strings.Contains(brief, "data") || strings.Contains(brief, "analytics") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("data-ai-architect", request.ProjectName))
}
// Privacy/compliance sensitive projects
if strings.Contains(brief, "privacy") || strings.Contains(brief, "personal data") ||
strings.Contains(brief, "gdpr") || strings.Contains(brief, "compliance") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("privacy-data-governance-officer", request.ProjectName))
}
// Regulated industries
if strings.Contains(brief, "healthcare") || strings.Contains(brief, "finance") ||
strings.Contains(brief, "banking") || strings.Contains(brief, "regulated") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("compliance-legal-liaison", request.ProjectName))
}
// Performance-critical systems
if strings.Contains(brief, "performance") || strings.Contains(brief, "high-load") ||
strings.Contains(brief, "scale") || strings.Contains(brief, "benchmark") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("performance-benchmarking-analyst", request.ProjectName))
}
// User-facing applications
if strings.Contains(brief, "user interface") || strings.Contains(brief, "ui") ||
strings.Contains(brief, "ux") || strings.Contains(brief, "frontend") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("ui-ux-designer", request.ProjectName))
}
// Mobile applications
if strings.Contains(brief, "mobile") || strings.Contains(brief, "ios") ||
strings.Contains(brief, "android") || strings.Contains(brief, "app store") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("ios-macos-developer", request.ProjectName))
}
// Games or graphics-intensive applications
if strings.Contains(brief, "game") || strings.Contains(brief, "graphics") ||
strings.Contains(brief, "rendering") || strings.Contains(brief, "3d") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("engine-programmer", request.ProjectName))
}
// Integration-heavy projects
if strings.Contains(brief, "integration") || strings.Contains(brief, "api") ||
strings.Contains(brief, "microservice") || strings.Contains(brief, "third-party") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("integration-architect", request.ProjectName))
}
// Cost-sensitive or enterprise projects
if strings.Contains(brief, "budget") || strings.Contains(brief, "cost") ||
strings.Contains(brief, "enterprise") || strings.Contains(brief, "licensing") {
selectedAgents = append(selectedAgents, cc.createOptionalAgent("cost-licensing-steward", request.ProjectName))
}
return selectedAgents
}
// createOptionalAgent creates an optional council agent
func (cc *CouncilComposer) createOptionalAgent(roleName, projectName string) CouncilAgent {
agentID := fmt.Sprintf("council-%s-%s", strings.ReplaceAll(projectName, " ", "-"), roleName)
return CouncilAgent{
AgentID: agentID,
RoleName: roleName,
AgentName: cc.formatRoleName(roleName),
Required: false,
Deployed: false,
Status: "pending",
}
}
// formatRoleName converts role key to human-readable name
func (cc *CouncilComposer) formatRoleName(roleName string) string {
// Convert kebab-case to Title Case
parts := strings.Split(roleName, "-")
for i, part := range parts {
parts[i] = strings.Title(part)
}
return strings.Join(parts, " ")
}
// storeCouncilComposition stores the council composition in the database
func (cc *CouncilComposer) storeCouncilComposition(ctx context.Context, composition *CouncilComposition, request *CouncilFormationRequest) error {
// Store council metadata
councilQuery := `
INSERT INTO councils (id, project_name, repository, project_brief, status, created_at, task_id, issue_id, external_url, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
`
metadataJSON, _ := json.Marshal(request.Metadata)
_, err := cc.db.Exec(ctx, councilQuery,
composition.CouncilID,
composition.ProjectName,
request.Repository,
request.ProjectBrief,
composition.Status,
composition.CreatedAt,
request.TaskID,
request.IssueID,
request.ExternalURL,
metadataJSON,
)
if err != nil {
return fmt.Errorf("failed to store council metadata: %w", err)
}
// Store council agents
for _, agent := range composition.CoreAgents {
err = cc.storeCouncilAgent(ctx, composition.CouncilID, agent)
if err != nil {
return fmt.Errorf("failed to store core agent %s: %w", agent.AgentID, err)
}
}
for _, agent := range composition.OptionalAgents {
err = cc.storeCouncilAgent(ctx, composition.CouncilID, agent)
if err != nil {
return fmt.Errorf("failed to store optional agent %s: %w", agent.AgentID, err)
}
}
return nil
}
// storeCouncilAgent stores a single council agent in the database
func (cc *CouncilComposer) storeCouncilAgent(ctx context.Context, councilID uuid.UUID, agent CouncilAgent) error {
query := `
INSERT INTO council_agents (council_id, agent_id, role_name, agent_name, required, deployed, status, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, NOW())
`
_, err := cc.db.Exec(ctx, query,
councilID,
agent.AgentID,
agent.RoleName,
agent.AgentName,
agent.Required,
agent.Deployed,
agent.Status,
)
return err
}
// GetCouncilComposition retrieves a council composition by ID
func (cc *CouncilComposer) GetCouncilComposition(ctx context.Context, councilID uuid.UUID) (*CouncilComposition, error) {
// Implementation would query the database and reconstruct the composition
// For now, return a simple error
return nil, fmt.Errorf("not implemented yet")
}
// UpdateCouncilStatus updates the status of a council
func (cc *CouncilComposer) UpdateCouncilStatus(ctx context.Context, councilID uuid.UUID, status string) error {
query := `UPDATE councils SET status = $1, updated_at = NOW() WHERE id = $2`
_, err := cc.db.Exec(ctx, query, status, councilID)
return err
}

104
internal/council/models.go Normal file
View File

@@ -0,0 +1,104 @@
package council
import (
"time"
"github.com/google/uuid"
)
// CouncilFormationRequest represents a request to form a project kickoff council
type CouncilFormationRequest struct {
ProjectName string `json:"project_name"`
Repository string `json:"repository"`
ProjectBrief string `json:"project_brief"`
Constraints string `json:"constraints,omitempty"`
TechLimits string `json:"tech_limits,omitempty"`
ComplianceNotes string `json:"compliance_notes,omitempty"`
Targets string `json:"targets,omitempty"`
TaskID uuid.UUID `json:"task_id"`
IssueID int64 `json:"issue_id"`
ExternalURL string `json:"external_url"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
// CouncilComposition defines the agents that make up the kickoff council
type CouncilComposition struct {
CouncilID uuid.UUID `json:"council_id"`
ProjectName string `json:"project_name"`
CoreAgents []CouncilAgent `json:"core_agents"`
OptionalAgents []CouncilAgent `json:"optional_agents"`
CreatedAt time.Time `json:"created_at"`
Status string `json:"status"` // forming, active, completed, failed
}
// CouncilAgent represents a single agent in the council
type CouncilAgent struct {
AgentID string `json:"agent_id"`
RoleName string `json:"role_name"`
AgentName string `json:"agent_name"`
Required bool `json:"required"`
Deployed bool `json:"deployed"`
ServiceID string `json:"service_id,omitempty"`
DeployedAt *time.Time `json:"deployed_at,omitempty"`
Status string `json:"status"` // pending, deploying, active, failed
}
// CouncilDeploymentResult represents the result of council agent deployment
type CouncilDeploymentResult struct {
CouncilID uuid.UUID `json:"council_id"`
ProjectName string `json:"project_name"`
DeployedAgents []DeployedCouncilAgent `json:"deployed_agents"`
Status string `json:"status"` // success, partial, failed
Message string `json:"message"`
DeployedAt time.Time `json:"deployed_at"`
Errors []string `json:"errors,omitempty"`
}
// DeployedCouncilAgent represents a successfully deployed council agent
type DeployedCouncilAgent struct {
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
RoleName string `json:"role_name"`
AgentID string `json:"agent_id"`
Image string `json:"image"`
Status string `json:"status"`
DeployedAt time.Time `json:"deployed_at"`
}
// CouncilArtifacts represents the outputs produced by the council
type CouncilArtifacts struct {
CouncilID uuid.UUID `json:"council_id"`
ProjectName string `json:"project_name"`
KickoffManifest map[string]interface{} `json:"kickoff_manifest,omitempty"`
SeminalDR string `json:"seminal_dr,omitempty"`
ScaffoldPlan map[string]interface{} `json:"scaffold_plan,omitempty"`
GateTests string `json:"gate_tests,omitempty"`
CHORUSLinks map[string]string `json:"chorus_links,omitempty"`
ProducedAt time.Time `json:"produced_at"`
Status string `json:"status"` // pending, partial, complete
}
// CoreCouncilRoles defines the required roles for any project kickoff council
var CoreCouncilRoles = []string{
"systems-analyst",
"senior-software-architect",
"tpm",
"security-architect",
"devex-platform-engineer",
"qa-test-engineer",
"sre-observability-lead",
"technical-writer",
}
// OptionalCouncilRoles defines the optional roles that may be included based on project needs
var OptionalCouncilRoles = []string{
"data-ai-architect",
"privacy-data-governance-officer",
"compliance-legal-liaison",
"performance-benchmarking-analyst",
"ui-ux-designer",
"ios-macos-developer",
"engine-programmer",
"integration-architect",
"cost-licensing-steward",
}

View File

@@ -8,8 +8,12 @@ import (
"strings"
"time"
"github.com/chorus-services/whoosh/internal/composer"
"github.com/chorus-services/whoosh/internal/config"
"github.com/chorus-services/whoosh/internal/council"
"github.com/chorus-services/whoosh/internal/gitea"
"github.com/chorus-services/whoosh/internal/orchestrator"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
@@ -17,19 +21,25 @@ import (
// Monitor manages repository monitoring and task creation
type Monitor struct {
db *pgxpool.Pool
gitea *gitea.Client
stopCh chan struct{}
syncInterval time.Duration
db *pgxpool.Pool
gitea *gitea.Client
composer *composer.Service
council *council.CouncilComposer
agentDeployer *orchestrator.AgentDeployer
stopCh chan struct{}
syncInterval time.Duration
}
// NewMonitor creates a new repository monitor
func NewMonitor(db *pgxpool.Pool, giteaCfg config.GITEAConfig) *Monitor {
func NewMonitor(db *pgxpool.Pool, giteaCfg config.GITEAConfig, composerService *composer.Service, councilComposer *council.CouncilComposer, agentDeployer *orchestrator.AgentDeployer) *Monitor {
return &Monitor{
db: db,
gitea: gitea.NewClient(giteaCfg),
stopCh: make(chan struct{}),
syncInterval: 5 * time.Minute, // Default sync every 5 minutes
db: db,
gitea: gitea.NewClient(giteaCfg),
composer: composerService,
council: councilComposer,
agentDeployer: agentDeployer,
stopCh: make(chan struct{}),
syncInterval: 5 * time.Minute, // Default sync every 5 minutes
}
}
@@ -126,8 +136,19 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
Limit: 100,
}
if repo.LastIssueSync != nil {
// Only use Since parameter for repositories that have completed initial scan
// For initial_scan or pending status, we want to scan ALL issues to find Design Briefs and UCXL content
if repo.LastIssueSync != nil && repo.SyncStatus != "initial_scan" && repo.SyncStatus != "pending" {
opts.Since = *repo.LastIssueSync
log.Debug().
Str("repository", repo.FullName).
Time("since", *repo.LastIssueSync).
Msg("Using incremental sync with Since parameter")
} else {
log.Info().
Str("repository", repo.FullName).
Str("sync_status", repo.SyncStatus).
Msg("Performing full scan (no Since parameter) - initial scan or looking for Design Briefs")
}
// Filter by CHORUS task labels if enabled
@@ -176,10 +197,41 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
} else {
updated++
}
// Check if this issue should trigger council formation
if m.isProjectKickoffBrief(issue, repo) {
m.triggerCouncilFormation(ctx, taskID, issue, repo)
}
}
duration := time.Since(startTime)
// Check if repository should transition from initial scan to active status
if repo.SyncStatus == "initial_scan" || repo.SyncStatus == "pending" {
// Repository has completed initial scan
// For now, transition to active if we processed any issues or found Design Briefs
// Future: Add UCXL content detection logic here
shouldActivate := (created > 0 || updated > 0)
if shouldActivate {
log.Info().
Str("repository", repo.FullName).
Int("tasks_created", created).
Int("tasks_updated", updated).
Msg("Transitioning repository from initial scan to active status - content found")
if err := m.updateRepositoryStatus(ctx, repo.ID, "active", nil); err != nil {
log.Error().Err(err).
Str("repository", repo.FullName).
Msg("Failed to transition repository to active status")
}
} else {
log.Info().
Str("repository", repo.FullName).
Msg("Initial scan completed - no content found, keeping in initial_scan status")
}
}
// Update repository sync timestamps and statistics
if err := m.updateRepositorySyncInfo(ctx, repo.ID, time.Now(), created, updated); err != nil {
log.Error().Err(err).
@@ -307,6 +359,17 @@ func (m *Monitor) createOrUpdateTask(ctx context.Context, repo RepositoryConfig,
return "", false, fmt.Errorf("failed to create task: %w", err)
}
// For newly created bzzz-task issues, check if it's a council formation trigger
if m.composer != nil && m.shouldTriggerTeamComposition(issue.Labels) {
if m.isProjectKickoffBrief(issue, repo) {
// This is a project kickoff - trigger council formation
go m.triggerCouncilFormation(context.Background(), taskID, issue, repo)
} else {
// Regular bzzz-task - trigger normal team composition
go m.triggerTeamComposition(context.Background(), taskID, issue, repo)
}
}
return taskID, true, nil
}
}
@@ -432,7 +495,7 @@ func (m *Monitor) getMonitoredRepositories(ctx context.Context) ([]RepositoryCon
// Parse CHORUS task labels
if err := json.Unmarshal(chorusLabelsJSON, &repo.ChorusTaskLabels); err != nil {
log.Error().Err(err).Str("repository", repo.FullName).Msg("Failed to parse CHORUS task labels")
repo.ChorusTaskLabels = []string{"bzzz-task", "chorus-task"} // Default labels
repo.ChorusTaskLabels = []string{"bzzz-task", "chorus-task", "chorus-entrypoint"} // Default labels
}
repos = append(repos, repo)
@@ -567,8 +630,416 @@ func (m *Monitor) getRepositoryByID(ctx context.Context, repoID string) (*Reposi
// Parse CHORUS task labels
if err := json.Unmarshal(chorusLabelsJSON, &repo.ChorusTaskLabels); err != nil {
log.Error().Err(err).Str("repository", repo.FullName).Msg("Failed to parse CHORUS task labels")
repo.ChorusTaskLabels = []string{"bzzz-task", "chorus-task"} // Default labels
repo.ChorusTaskLabels = []string{"bzzz-task", "chorus-task", "chorus-entrypoint"} // Default labels
}
return &repo, nil
}
}
// shouldTriggerTeamComposition checks if the issue has labels that should trigger team composition
func (m *Monitor) shouldTriggerTeamComposition(labels []gitea.Label) bool {
for _, label := range labels {
if strings.ToLower(label.Name) == "bzzz-task" {
return true
}
}
return false
}
// isProjectKickoffBrief checks if the issue represents a new project kickoff council trigger
func (m *Monitor) isProjectKickoffBrief(issue gitea.Issue, repo RepositoryConfig) bool {
// Check if it has the chorus-entrypoint label
hasChorusEntrypoint := false
for _, label := range issue.Labels {
if strings.ToLower(label.Name) == "chorus-entrypoint" {
hasChorusEntrypoint = true
break
}
}
if !hasChorusEntrypoint {
return false
}
// Check if the issue title contains "Design Brief"
title := strings.ToLower(issue.Title)
if !strings.Contains(title, "design brief") {
return false
}
// Additional validation: this should be a new/empty repository
// For now, we'll rely on the title check, but could add repo analysis later
log.Info().
Str("repository", repo.FullName).
Str("issue_title", issue.Title).
Msg("🎭 Detected project kickoff brief - council formation required")
return true
}
// triggerTeamComposition initiates team composition for a newly created task
func (m *Monitor) triggerTeamComposition(ctx context.Context, taskID string, issue gitea.Issue, repo RepositoryConfig) {
log.Info().
Str("task_id", taskID).
Int64("issue_id", issue.ID).
Str("repository", repo.FullName).
Msg("🎯 Triggering team composition for bzzz-task")
// Convert Gitea issue to TaskAnalysisInput
techStack := m.extractTechStackFromIssue(issue)
requirements := m.extractRequirementsFromIssue(issue)
analysisInput := &composer.TaskAnalysisInput{
Title: issue.Title,
Description: issue.Body,
Requirements: requirements,
Repository: repo.FullName,
Priority: m.mapPriorityToComposer(m.extractPriorityFromLabels(issue.Labels)),
TechStack: techStack,
Metadata: map[string]interface{}{
"task_id": taskID,
"issue_id": issue.ID,
"issue_number": issue.Number,
"repository_id": repo.ID,
"external_url": issue.HTMLURL,
},
}
// Perform team composition analysis
result, err := m.composer.AnalyzeAndComposeTeam(ctx, analysisInput)
if err != nil {
log.Error().Err(err).
Str("task_id", taskID).
Msg("Failed to perform team composition analysis")
return
}
log.Info().
Str("task_id", taskID).
Str("team_id", result.TeamComposition.TeamID.String()).
Int("team_size", result.TeamComposition.EstimatedSize).
Float64("confidence", result.TeamComposition.ConfidenceScore).
Msg("✅ Team composition analysis completed")
// Create the team in the database
team, err := m.composer.CreateTeam(ctx, result.TeamComposition, analysisInput)
if err != nil {
log.Error().Err(err).
Str("task_id", taskID).
Msg("Failed to create team")
return
}
// Update task with team assignment
err = m.assignTaskToTeam(ctx, taskID, team.ID.String())
if err != nil {
log.Error().Err(err).
Str("task_id", taskID).
Str("team_id", team.ID.String()).
Msg("Failed to assign task to team")
return
}
// Deploy agents for the newly formed team if agent deployer is available
if m.agentDeployer != nil {
go m.deployTeamAgents(ctx, taskID, team, result.TeamComposition, repo)
}
log.Info().
Str("task_id", taskID).
Str("team_id", team.ID.String()).
Str("team_name", team.Name).
Msg("🚀 Task successfully assigned to team")
}
// deployTeamAgents deploys Docker containers for the assigned team agents
func (m *Monitor) deployTeamAgents(ctx context.Context, taskID string, team *composer.Team, teamComposition *composer.TeamComposition, repo RepositoryConfig) {
log.Info().
Str("task_id", taskID).
Str("team_id", team.ID.String()).
Int("agents_to_deploy", len(teamComposition.AgentMatches)).
Msg("🚀 Starting agent deployment for team")
// Convert string UUIDs to uuid.UUID type
taskUUID, err := uuid.Parse(taskID)
if err != nil {
log.Error().Err(err).Str("task_id", taskID).Msg("Invalid task ID format")
return
}
// Create deployment request for the entire team
deploymentRequest := &orchestrator.DeploymentRequest{
TaskID: taskUUID,
TeamID: team.ID,
TeamComposition: teamComposition,
TaskContext: &orchestrator.TaskContext{
IssueTitle: team.Description, // Use team description which comes from issue title
IssueDescription: team.Description, // TODO: Extract actual issue description
Repository: repo.FullName,
TechStack: []string{"go", "docker", "ai"}, // TODO: Extract from analysis
Requirements: []string{}, // TODO: Extract from issue
Priority: "medium", // TODO: Extract from team data
ExternalURL: "", // TODO: Add issue URL
Metadata: map[string]interface{}{
"task_type": "development",
},
},
DeploymentMode: "immediate",
}
// Deploy all agents for this team
deploymentResult, err := m.agentDeployer.DeployTeamAgents(deploymentRequest)
if err != nil {
log.Error().Err(err).
Str("task_id", taskID).
Str("team_id", team.ID.String()).
Msg("Failed to deploy team agents")
return
}
log.Info().
Str("task_id", taskID).
Str("team_id", team.ID.String()).
Str("status", deploymentResult.Status).
Int("agents_deployed", len(deploymentResult.DeployedServices)).
Msg("🎉 Successfully deployed team agents")
// TODO: Update database with deployment information
// This could include service IDs, container names, deployment status, etc.
}
// extractRequirementsFromIssue extracts requirements from issue description
func (m *Monitor) extractRequirementsFromIssue(issue gitea.Issue) []string {
requirements := []string{}
// Split description into lines and look for bullet points or numbered lists
lines := strings.Split(issue.Body, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// Look for bullet points (-, *, +) or numbers (1., 2., etc.)
if strings.HasPrefix(line, "-") || strings.HasPrefix(line, "*") || strings.HasPrefix(line, "+") {
req := strings.TrimSpace(line[1:])
if req != "" {
requirements = append(requirements, req)
}
} else if len(line) > 2 && line[1] == '.' && line[0] >= '0' && line[0] <= '9' {
req := strings.TrimSpace(line[2:])
if req != "" {
requirements = append(requirements, req)
}
}
}
return requirements
}
// mapPriorityToComposer converts internal priority to composer priority
func (m *Monitor) mapPriorityToComposer(priority string) composer.TaskPriority {
switch strings.ToLower(priority) {
case "critical":
return composer.PriorityCritical
case "high":
return composer.PriorityHigh
case "low":
return composer.PriorityLow
default:
return composer.PriorityMedium
}
}
// assignTaskToTeam updates the task record with the assigned team ID
func (m *Monitor) assignTaskToTeam(ctx context.Context, taskID, teamID string) error {
query := `
UPDATE tasks
SET assigned_team_id = $1, status = $2, updated_at = NOW()
WHERE id = $3
`
_, err := m.db.Exec(ctx, query, teamID, "claimed", taskID)
if err != nil {
return fmt.Errorf("failed to assign task to team: %w", err)
}
return nil
}
// triggerCouncilFormation initiates council formation for a project kickoff
func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, issue gitea.Issue, repo RepositoryConfig) {
log.Info().
Str("task_id", taskID).
Int64("issue_id", issue.ID).
Str("repository", repo.FullName).
Str("issue_title", issue.Title).
Msg("🎭 Triggering council formation for project kickoff")
// Convert task ID to UUID
taskUUID, err := uuid.Parse(taskID)
if err != nil {
log.Error().
Err(err).
Str("task_id", taskID).
Msg("Failed to parse task ID as UUID")
return
}
// Extract project name from repository name (remove owner prefix)
projectName := strings.Split(repo.FullName, "/")[1]
// Create council formation request
councilRequest := &council.CouncilFormationRequest{
ProjectName: projectName,
Repository: repo.FullName,
ProjectBrief: issue.Body,
TaskID: taskUUID,
IssueID: issue.ID,
ExternalURL: issue.HTMLURL,
Metadata: map[string]interface{}{
"task_id": taskID,
"issue_id": issue.ID,
"issue_number": issue.Number,
"repository_id": repo.ID,
"created_by": issue.User.Login,
"labels": m.extractLabelNames(issue.Labels),
"milestone": m.extractMilestone(issue),
},
}
// Form the council
composition, err := m.council.FormCouncil(ctx, councilRequest)
if err != nil {
log.Error().Err(err).
Str("task_id", taskID).
Str("project_name", projectName).
Msg("Failed to form project kickoff council")
return
}
log.Info().
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
Int("core_agents", len(composition.CoreAgents)).
Int("optional_agents", len(composition.OptionalAgents)).
Msg("✅ Council composition formed")
// Deploy council agents if agent deployer is available
if m.agentDeployer != nil {
go m.deployCouncilAgents(ctx, taskID, composition, councilRequest, repo)
}
// Update task status to indicate council formation
err = m.assignTaskToCouncil(ctx, taskID, composition.CouncilID.String())
if err != nil {
log.Error().
Err(err).
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
Msg("Failed to assign task to council")
}
log.Info().
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
Str("project_name", projectName).
Msg("🚀 Project kickoff council successfully formed and deploying")
}
// deployCouncilAgents deploys Docker containers for the council agents
func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, composition *council.CouncilComposition, request *council.CouncilFormationRequest, repo RepositoryConfig) {
log.Info().
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
Int("core_agents", len(composition.CoreAgents)).
Int("optional_agents", len(composition.OptionalAgents)).
Msg("🚀 Starting council agent deployment")
// Create council deployment request
deploymentRequest := &orchestrator.CouncilDeploymentRequest{
CouncilID: composition.CouncilID,
ProjectName: composition.ProjectName,
CouncilComposition: composition,
ProjectContext: &orchestrator.CouncilProjectContext{
ProjectName: composition.ProjectName,
Repository: request.Repository,
ProjectBrief: request.ProjectBrief,
Constraints: request.Constraints,
TechLimits: request.TechLimits,
ComplianceNotes: request.ComplianceNotes,
Targets: request.Targets,
ExternalURL: request.ExternalURL,
},
DeploymentMode: "immediate",
}
// Deploy the council agents
result, err := m.agentDeployer.DeployCouncilAgents(deploymentRequest)
if err != nil {
log.Error().
Err(err).
Str("council_id", composition.CouncilID.String()).
Msg("Failed to deploy council agents")
// Update council status to failed
m.council.UpdateCouncilStatus(ctx, composition.CouncilID, "failed")
return
}
log.Info().
Str("council_id", composition.CouncilID.String()).
Str("deployment_status", result.Status).
Int("deployed_agents", len(result.DeployedAgents)).
Int("errors", len(result.Errors)).
Msg("✅ Council agent deployment completed")
// Log deployment details for each agent
for _, agent := range result.DeployedAgents {
log.Info().
Str("council_id", composition.CouncilID.String()).
Str("service_id", agent.ServiceID).
Str("role", agent.RoleName).
Str("agent_id", agent.AgentID).
Msg("🤖 Council agent deployed")
}
if len(result.Errors) > 0 {
for _, errMsg := range result.Errors {
log.Warn().
Str("council_id", composition.CouncilID.String()).
Str("error", errMsg).
Msg("⚠️ Council agent deployment error")
}
}
}
// assignTaskToCouncil updates the task record with the assigned council ID
func (m *Monitor) assignTaskToCouncil(ctx context.Context, taskID, councilID string) error {
query := `
UPDATE tasks
SET assigned_team_id = $1, status = $2, updated_at = NOW()
WHERE id = $3
`
// Use council ID as team ID for consistency with existing schema
_, err := m.db.Exec(ctx, query, councilID, "council_forming", taskID)
if err != nil {
return fmt.Errorf("failed to assign task to council: %w", err)
}
return nil
}
// extractLabelNames extracts label names from gitea labels
func (m *Monitor) extractLabelNames(labels []gitea.Label) []string {
names := make([]string, len(labels))
for i, label := range labels {
names[i] = label.Name
}
return names
}
// extractMilestone extracts milestone information if present
func (m *Monitor) extractMilestone(issue gitea.Issue) string {
// Note: Milestone field access depends on Gitea SDK version
// For now, return empty string to avoid build issues
return ""
}

View File

@@ -0,0 +1,591 @@
package orchestrator
import (
"context"
"fmt"
"time"
"github.com/chorus-services/whoosh/internal/composer"
"github.com/chorus-services/whoosh/internal/council"
"github.com/docker/docker/api/types/swarm"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
)
// AgentDeployer manages deployment of agent containers for teams
type AgentDeployer struct {
swarmManager *SwarmManager
db *pgxpool.Pool
registry string
ctx context.Context
cancel context.CancelFunc
}
// NewAgentDeployer creates a new agent deployer
func NewAgentDeployer(swarmManager *SwarmManager, db *pgxpool.Pool, registry string) *AgentDeployer {
ctx, cancel := context.WithCancel(context.Background())
if registry == "" {
registry = "registry.home.deepblack.cloud"
}
return &AgentDeployer{
swarmManager: swarmManager,
db: db,
registry: registry,
ctx: ctx,
cancel: cancel,
}
}
// Close shuts down the agent deployer
func (ad *AgentDeployer) Close() error {
ad.cancel()
return nil
}
// DeploymentRequest represents a request to deploy agents for a team
type DeploymentRequest struct {
TeamID uuid.UUID `json:"team_id"`
TaskID uuid.UUID `json:"task_id"`
TeamComposition *composer.TeamComposition `json:"team_composition"`
TaskContext *TaskContext `json:"task_context"`
DeploymentMode string `json:"deployment_mode"` // immediate, scheduled, manual
}
// DeploymentResult represents the result of a deployment operation
type DeploymentResult struct {
TeamID uuid.UUID `json:"team_id"`
TaskID uuid.UUID `json:"task_id"`
DeployedServices []DeployedService `json:"deployed_services"`
Status string `json:"status"` // success, partial, failed
Message string `json:"message"`
DeployedAt time.Time `json:"deployed_at"`
Errors []string `json:"errors,omitempty"`
}
// DeployedService represents a successfully deployed service
type DeployedService struct {
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
AgentRole string `json:"agent_role"`
AgentID string `json:"agent_id"`
Image string `json:"image"`
Status string `json:"status"`
}
// CouncilDeploymentRequest represents a request to deploy council agents
type CouncilDeploymentRequest struct {
CouncilID uuid.UUID `json:"council_id"`
ProjectName string `json:"project_name"`
CouncilComposition *council.CouncilComposition `json:"council_composition"`
ProjectContext *CouncilProjectContext `json:"project_context"`
DeploymentMode string `json:"deployment_mode"` // immediate, scheduled, manual
}
// CouncilProjectContext contains the project information for council agents
type CouncilProjectContext struct {
ProjectName string `json:"project_name"`
Repository string `json:"repository"`
ProjectBrief string `json:"project_brief"`
Constraints string `json:"constraints,omitempty"`
TechLimits string `json:"tech_limits,omitempty"`
ComplianceNotes string `json:"compliance_notes,omitempty"`
Targets string `json:"targets,omitempty"`
ExternalURL string `json:"external_url,omitempty"`
}
// DeployTeamAgents deploys all agents for a team
func (ad *AgentDeployer) DeployTeamAgents(request *DeploymentRequest) (*DeploymentResult, error) {
log.Info().
Str("team_id", request.TeamID.String()).
Str("task_id", request.TaskID.String()).
Int("agent_matches", len(request.TeamComposition.AgentMatches)).
Msg("🚀 Starting team agent deployment")
result := &DeploymentResult{
TeamID: request.TeamID,
TaskID: request.TaskID,
DeployedServices: []DeployedService{},
DeployedAt: time.Now(),
Errors: []string{},
}
// Deploy each agent in the team composition
for _, agentMatch := range request.TeamComposition.AgentMatches {
service, err := ad.deploySingleAgent(request, agentMatch)
if err != nil {
errorMsg := fmt.Sprintf("Failed to deploy agent %s for role %s: %v",
agentMatch.Agent.Name, agentMatch.Role.Name, err)
result.Errors = append(result.Errors, errorMsg)
log.Error().
Err(err).
Str("agent_id", agentMatch.Agent.ID.String()).
Str("role", agentMatch.Role.Name).
Msg("Failed to deploy agent")
continue
}
deployedService := DeployedService{
ServiceID: service.ID,
ServiceName: service.Spec.Name,
AgentRole: agentMatch.Role.Name,
AgentID: agentMatch.Agent.ID.String(),
Image: service.Spec.TaskTemplate.ContainerSpec.Image,
Status: "deploying",
}
result.DeployedServices = append(result.DeployedServices, deployedService)
// Update database with deployment info
err = ad.recordDeployment(request.TeamID, request.TaskID, agentMatch, service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to record deployment in database")
}
}
// Determine overall deployment status
if len(result.Errors) == 0 {
result.Status = "success"
result.Message = fmt.Sprintf("Successfully deployed %d agents", len(result.DeployedServices))
} else if len(result.DeployedServices) > 0 {
result.Status = "partial"
result.Message = fmt.Sprintf("Deployed %d/%d agents with %d errors",
len(result.DeployedServices),
len(request.TeamComposition.AgentMatches),
len(result.Errors))
} else {
result.Status = "failed"
result.Message = "Failed to deploy any agents"
}
// Update team deployment status in database
err := ad.updateTeamDeploymentStatus(request.TeamID, result.Status, result.Message)
if err != nil {
log.Error().
Err(err).
Str("team_id", request.TeamID.String()).
Msg("Failed to update team deployment status")
}
log.Info().
Str("team_id", request.TeamID.String()).
Str("status", result.Status).
Int("deployed", len(result.DeployedServices)).
Int("errors", len(result.Errors)).
Msg("✅ Team agent deployment completed")
return result, nil
}
// selectAgentImage determines the appropriate CHORUS image for the agent role
func (ad *AgentDeployer) selectAgentImage(roleName string, agent *composer.Agent) string {
// All agents use the same CHORUS image, but with different configurations
// The image handles role specialization internally based on environment variables
return "docker.io/anthonyrawlins/chorus:backbeat-v2.0.1"
}
// buildAgentEnvironment creates environment variables for CHORUS agent configuration
func (ad *AgentDeployer) buildAgentEnvironment(request *DeploymentRequest, agentMatch *composer.AgentMatch) map[string]string {
env := map[string]string{
// Core CHORUS configuration - just pass the agent name from human-roles.yaml
// CHORUS will handle its own prompt composition and system behavior
"CHORUS_AGENT_NAME": agentMatch.Role.Name, // This maps to human-roles.yaml agent definition
"CHORUS_TEAM_ID": request.TeamID.String(),
"CHORUS_TASK_ID": request.TaskID.String(),
// Essential task context
"CHORUS_PROJECT": request.TaskContext.Repository,
"CHORUS_TASK_TITLE": request.TaskContext.IssueTitle,
"CHORUS_TASK_DESC": request.TaskContext.IssueDescription,
"CHORUS_PRIORITY": request.TaskContext.Priority,
"CHORUS_EXTERNAL_URL": request.TaskContext.ExternalURL,
// WHOOSH coordination
"WHOOSH_COORDINATOR": "true",
"WHOOSH_ENDPOINT": "http://whoosh:8080",
// Docker access for CHORUS sandbox management
"DOCKER_HOST": "unix:///var/run/docker.sock",
}
return env
}
// Note: CHORUS handles its own prompt composition from human-roles.yaml
// We just need to pass the agent name and essential task context
// determineAgentType maps role to agent type for resource allocation
func (ad *AgentDeployer) determineAgentType(agentMatch *composer.AgentMatch) string {
// Simple mapping for now - could be enhanced based on role complexity
return "standard"
}
// calculateResources determines resource requirements for the agent
func (ad *AgentDeployer) calculateResources(agentMatch *composer.AgentMatch) ResourceLimits {
// Standard resource allocation for CHORUS agents
// CHORUS handles its own resource management internally
return ResourceLimits{
CPULimit: 1000000000, // 1 CPU core
MemoryLimit: 1073741824, // 1GB RAM
CPURequest: 500000000, // 0.5 CPU core
MemoryRequest: 536870912, // 512MB RAM
}
}
// buildAgentVolumes creates volume mounts for CHORUS agents
func (ad *AgentDeployer) buildAgentVolumes(request *DeploymentRequest) []VolumeMount {
return []VolumeMount{
{
Type: "bind",
Source: "/var/run/docker.sock",
Target: "/var/run/docker.sock",
ReadOnly: false, // CHORUS needs Docker access for sandboxing
},
{
Type: "volume",
Source: fmt.Sprintf("whoosh-workspace-%s", request.TeamID.String()),
Target: "/workspace",
ReadOnly: false,
},
}
}
// buildAgentPlacement creates placement constraints for agents
func (ad *AgentDeployer) buildAgentPlacement(agentMatch *composer.AgentMatch) PlacementConfig {
return PlacementConfig{
Constraints: []string{
"node.role==worker", // Prefer worker nodes for agent containers
},
// Note: Placement preferences removed for compilation compatibility
}
}
// deploySingleAgent deploys a single agent for a specific role
func (ad *AgentDeployer) deploySingleAgent(request *DeploymentRequest, agentMatch *composer.AgentMatch) (*swarm.Service, error) {
// Determine agent image based on role
image := ad.selectAgentImage(agentMatch.Role.Name, agentMatch.Agent)
// Build deployment configuration
config := &AgentDeploymentConfig{
TeamID: request.TeamID.String(),
TaskID: request.TaskID.String(),
AgentRole: agentMatch.Role.Name,
AgentType: ad.determineAgentType(agentMatch),
Image: image,
Replicas: 1, // Start with single replica per agent
Resources: ad.calculateResources(agentMatch),
Environment: ad.buildAgentEnvironment(request, agentMatch),
TaskContext: *request.TaskContext,
Networks: []string{"chorus_default"},
Volumes: ad.buildAgentVolumes(request),
Placement: ad.buildAgentPlacement(agentMatch),
}
// Deploy the service
service, err := ad.swarmManager.DeployAgent(config)
if err != nil {
return nil, fmt.Errorf("failed to deploy agent service: %w", err)
}
return service, nil
}
// recordDeployment records agent deployment information in the database
func (ad *AgentDeployer) recordDeployment(teamID uuid.UUID, taskID uuid.UUID, agentMatch *composer.AgentMatch, serviceID string) error {
query := `
INSERT INTO agent_deployments (team_id, task_id, agent_id, role_id, service_id, status, deployed_at)
VALUES ($1, $2, $3, $4, $5, $6, NOW())
`
_, err := ad.db.Exec(ad.ctx, query, teamID, taskID, agentMatch.Agent.ID, agentMatch.Role.ID, serviceID, "deployed")
return err
}
// updateTeamDeploymentStatus updates the team deployment status in the database
func (ad *AgentDeployer) updateTeamDeploymentStatus(teamID uuid.UUID, status, message string) error {
query := `
UPDATE teams
SET deployment_status = $1, deployment_message = $2, updated_at = NOW()
WHERE id = $3
`
_, err := ad.db.Exec(ad.ctx, query, status, message, teamID)
return err
}
// DeployCouncilAgents deploys all agents for a project kickoff council
func (ad *AgentDeployer) DeployCouncilAgents(request *CouncilDeploymentRequest) (*council.CouncilDeploymentResult, error) {
log.Info().
Str("council_id", request.CouncilID.String()).
Str("project_name", request.ProjectName).
Int("core_agents", len(request.CouncilComposition.CoreAgents)).
Int("optional_agents", len(request.CouncilComposition.OptionalAgents)).
Msg("🎭 Starting council agent deployment")
result := &council.CouncilDeploymentResult{
CouncilID: request.CouncilID,
ProjectName: request.ProjectName,
DeployedAgents: []council.DeployedCouncilAgent{},
DeployedAt: time.Now(),
Errors: []string{},
}
// Deploy core agents (required)
for _, agent := range request.CouncilComposition.CoreAgents {
deployedAgent, err := ad.deploySingleCouncilAgent(request, agent)
if err != nil {
errorMsg := fmt.Sprintf("Failed to deploy core agent %s (%s): %v",
agent.AgentName, agent.RoleName, err)
result.Errors = append(result.Errors, errorMsg)
log.Error().
Err(err).
Str("agent_id", agent.AgentID).
Str("role", agent.RoleName).
Msg("Failed to deploy core council agent")
continue
}
result.DeployedAgents = append(result.DeployedAgents, *deployedAgent)
// Update database with deployment info
err = ad.recordCouncilAgentDeployment(request.CouncilID, agent, deployedAgent.ServiceID)
if err != nil {
log.Error().
Err(err).
Str("service_id", deployedAgent.ServiceID).
Msg("Failed to record council agent deployment in database")
}
}
// Deploy optional agents (best effort)
for _, agent := range request.CouncilComposition.OptionalAgents {
deployedAgent, err := ad.deploySingleCouncilAgent(request, agent)
if err != nil {
// Optional agents failing is not critical
log.Warn().
Err(err).
Str("agent_id", agent.AgentID).
Str("role", agent.RoleName).
Msg("Failed to deploy optional council agent (non-critical)")
continue
}
result.DeployedAgents = append(result.DeployedAgents, *deployedAgent)
// Update database with deployment info
err = ad.recordCouncilAgentDeployment(request.CouncilID, agent, deployedAgent.ServiceID)
if err != nil {
log.Error().
Err(err).
Str("service_id", deployedAgent.ServiceID).
Msg("Failed to record council agent deployment in database")
}
}
// Determine overall deployment status
coreAgentsCount := len(request.CouncilComposition.CoreAgents)
deployedCoreAgents := 0
for _, deployedAgent := range result.DeployedAgents {
// Check if this deployed agent is a core agent
for _, coreAgent := range request.CouncilComposition.CoreAgents {
if coreAgent.RoleName == deployedAgent.RoleName {
deployedCoreAgents++
break
}
}
}
if deployedCoreAgents == coreAgentsCount {
result.Status = "success"
result.Message = fmt.Sprintf("Successfully deployed %d agents (%d core, %d optional)",
len(result.DeployedAgents), deployedCoreAgents, len(result.DeployedAgents)-deployedCoreAgents)
} else if deployedCoreAgents > 0 {
result.Status = "partial"
result.Message = fmt.Sprintf("Deployed %d/%d core agents with %d errors",
deployedCoreAgents, coreAgentsCount, len(result.Errors))
} else {
result.Status = "failed"
result.Message = "Failed to deploy any core council agents"
}
// Update council deployment status in database
err := ad.updateCouncilDeploymentStatus(request.CouncilID, result.Status, result.Message)
if err != nil {
log.Error().
Err(err).
Str("council_id", request.CouncilID.String()).
Msg("Failed to update council deployment status")
}
log.Info().
Str("council_id", request.CouncilID.String()).
Str("status", result.Status).
Int("deployed", len(result.DeployedAgents)).
Int("errors", len(result.Errors)).
Msg("✅ Council agent deployment completed")
return result, nil
}
// deploySingleCouncilAgent deploys a single council agent
func (ad *AgentDeployer) deploySingleCouncilAgent(request *CouncilDeploymentRequest, agent council.CouncilAgent) (*council.DeployedCouncilAgent, error) {
// Use the CHORUS image for all council agents
image := "docker.io/anthonyrawlins/chorus:backbeat-v2.0.1"
// Build council-specific deployment configuration
config := &AgentDeploymentConfig{
TeamID: request.CouncilID.String(), // Use council ID as team ID
TaskID: request.CouncilID.String(), // Use council ID as task ID
AgentRole: agent.RoleName,
AgentType: "council",
Image: image,
Replicas: 1, // Single replica per council agent
Resources: ad.calculateCouncilResources(agent),
Environment: ad.buildCouncilAgentEnvironment(request, agent),
TaskContext: TaskContext{
Repository: request.ProjectContext.Repository,
IssueTitle: request.ProjectContext.ProjectName,
IssueDescription: request.ProjectContext.ProjectBrief,
Priority: "high", // Council formation is always high priority
ExternalURL: request.ProjectContext.ExternalURL,
},
Networks: []string{"chorus_default"}, // Connect to CHORUS network
Volumes: ad.buildCouncilAgentVolumes(request),
Placement: ad.buildCouncilAgentPlacement(agent),
}
// Deploy the service
service, err := ad.swarmManager.DeployAgent(config)
if err != nil {
return nil, fmt.Errorf("failed to deploy council agent service: %w", err)
}
// Create deployed agent result
deployedAgent := &council.DeployedCouncilAgent{
ServiceID: service.ID,
ServiceName: service.Spec.Name,
RoleName: agent.RoleName,
AgentID: agent.AgentID,
Image: image,
Status: "deploying",
DeployedAt: time.Now(),
}
return deployedAgent, nil
}
// buildCouncilAgentEnvironment creates environment variables for council agent configuration
func (ad *AgentDeployer) buildCouncilAgentEnvironment(request *CouncilDeploymentRequest, agent council.CouncilAgent) map[string]string {
env := map[string]string{
// Core CHORUS configuration for council mode
"CHORUS_AGENT_NAME": agent.RoleName, // Maps to human-roles.yaml agent definition
"CHORUS_COUNCIL_MODE": "true", // Enable council mode
"CHORUS_COUNCIL_ID": request.CouncilID.String(),
"CHORUS_PROJECT_NAME": request.ProjectContext.ProjectName,
// Council prompt and context
"CHORUS_COUNCIL_PROMPT": "/app/prompts/council.md",
"CHORUS_PROJECT_BRIEF": request.ProjectContext.ProjectBrief,
"CHORUS_CONSTRAINTS": request.ProjectContext.Constraints,
"CHORUS_TECH_LIMITS": request.ProjectContext.TechLimits,
"CHORUS_COMPLIANCE_NOTES": request.ProjectContext.ComplianceNotes,
"CHORUS_TARGETS": request.ProjectContext.Targets,
// Essential project context
"CHORUS_PROJECT": request.ProjectContext.Repository,
"CHORUS_EXTERNAL_URL": request.ProjectContext.ExternalURL,
"CHORUS_PRIORITY": "high",
// WHOOSH coordination
"WHOOSH_COORDINATOR": "true",
"WHOOSH_ENDPOINT": "http://whoosh:8080",
// Docker access for CHORUS sandbox management
"DOCKER_HOST": "unix:///var/run/docker.sock",
}
return env
}
// calculateCouncilResources determines resource requirements for council agents
func (ad *AgentDeployer) calculateCouncilResources(agent council.CouncilAgent) ResourceLimits {
// Council agents get slightly more resources since they handle complex analysis
return ResourceLimits{
CPULimit: 1500000000, // 1.5 CPU cores
MemoryLimit: 2147483648, // 2GB RAM
CPURequest: 750000000, // 0.75 CPU core
MemoryRequest: 1073741824, // 1GB RAM
}
}
// buildCouncilAgentVolumes creates volume mounts for council agents
func (ad *AgentDeployer) buildCouncilAgentVolumes(request *CouncilDeploymentRequest) []VolumeMount {
return []VolumeMount{
{
Type: "bind",
Source: "/var/run/docker.sock",
Target: "/var/run/docker.sock",
ReadOnly: false, // Council agents need Docker access for complex setup
},
{
Type: "volume",
Source: fmt.Sprintf("whoosh-council-%s", request.CouncilID.String()),
Target: "/workspace",
ReadOnly: false,
},
{
Type: "bind",
Source: "/rust/containers/WHOOSH/prompts",
Target: "/app/prompts",
ReadOnly: true, // Mount council prompts
},
}
}
// buildCouncilAgentPlacement creates placement constraints for council agents
func (ad *AgentDeployer) buildCouncilAgentPlacement(agent council.CouncilAgent) PlacementConfig {
return PlacementConfig{
Constraints: []string{
"node.role==worker", // Prefer worker nodes for council containers
},
}
}
// recordCouncilAgentDeployment records council agent deployment information in the database
func (ad *AgentDeployer) recordCouncilAgentDeployment(councilID uuid.UUID, agent council.CouncilAgent, serviceID string) error {
query := `
UPDATE council_agents
SET deployed = true, status = 'active', service_id = $1, deployed_at = NOW(), updated_at = NOW()
WHERE council_id = $2 AND agent_id = $3
`
_, err := ad.db.Exec(ad.ctx, query, serviceID, councilID, agent.AgentID)
return err
}
// updateCouncilDeploymentStatus updates the council deployment status in the database
func (ad *AgentDeployer) updateCouncilDeploymentStatus(councilID uuid.UUID, status, message string) error {
query := `
UPDATE councils
SET status = $1, updated_at = NOW()
WHERE id = $2
`
// Map deployment status to council status
councilStatus := "active"
if status == "failed" {
councilStatus = "failed"
} else if status == "partial" {
councilStatus = "active" // Partial deployment still allows council to function
}
_, err := ad.db.Exec(ad.ctx, query, councilStatus, councilID)
return err
}

View File

@@ -0,0 +1,568 @@
package orchestrator
import (
"context"
"encoding/json"
"fmt"
"io"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/mount"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
"github.com/rs/zerolog/log"
)
// SwarmManager manages Docker Swarm services for agent deployment
type SwarmManager struct {
client *client.Client
ctx context.Context
cancel context.CancelFunc
registry string // Docker registry for agent images
}
// NewSwarmManager creates a new Docker Swarm manager
func NewSwarmManager(dockerHost, registry string) (*SwarmManager, error) {
ctx, cancel := context.WithCancel(context.Background())
// Create Docker client
var dockerClient *client.Client
var err error
if dockerHost != "" {
dockerClient, err = client.NewClientWithOpts(
client.WithHost(dockerHost),
client.WithAPIVersionNegotiation(),
)
} else {
dockerClient, err = client.NewClientWithOpts(
client.FromEnv,
client.WithAPIVersionNegotiation(),
)
}
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
// Test connection
_, err = dockerClient.Ping(ctx)
if err != nil {
cancel()
return nil, fmt.Errorf("failed to connect to Docker daemon: %w", err)
}
if registry == "" {
registry = "registry.home.deepblack.cloud" // Default private registry
}
return &SwarmManager{
client: dockerClient,
ctx: ctx,
cancel: cancel,
registry: registry,
}, nil
}
// Close closes the Docker client and cancels context
func (sm *SwarmManager) Close() error {
sm.cancel()
return sm.client.Close()
}
// AgentDeploymentConfig defines configuration for deploying an agent
type AgentDeploymentConfig struct {
TeamID string `json:"team_id"`
TaskID string `json:"task_id"`
AgentRole string `json:"agent_role"` // executor, coordinator, reviewer
AgentType string `json:"agent_type"` // general, specialized
Image string `json:"image"` // Docker image to use
Replicas uint64 `json:"replicas"` // Number of instances
Resources ResourceLimits `json:"resources"` // CPU/Memory limits
Environment map[string]string `json:"environment"` // Environment variables
TaskContext TaskContext `json:"task_context"` // Task-specific context
Networks []string `json:"networks"` // Docker networks to join
Volumes []VolumeMount `json:"volumes"` // Volume mounts
Placement PlacementConfig `json:"placement"` // Node placement constraints
}
// ResourceLimits defines CPU and memory limits for containers
type ResourceLimits struct {
CPULimit int64 `json:"cpu_limit"` // CPU limit in nano CPUs (1e9 = 1 CPU)
MemoryLimit int64 `json:"memory_limit"` // Memory limit in bytes
CPURequest int64 `json:"cpu_request"` // CPU request in nano CPUs
MemoryRequest int64 `json:"memory_request"` // Memory request in bytes
}
// TaskContext provides task-specific information to agents
type TaskContext struct {
IssueTitle string `json:"issue_title"`
IssueDescription string `json:"issue_description"`
Repository string `json:"repository"`
TechStack []string `json:"tech_stack"`
Requirements []string `json:"requirements"`
Priority string `json:"priority"`
ExternalURL string `json:"external_url"`
Metadata map[string]interface{} `json:"metadata"`
}
// VolumeMount defines a volume mount for containers
type VolumeMount struct {
Source string `json:"source"` // Host path or volume name
Target string `json:"target"` // Container path
ReadOnly bool `json:"readonly"` // Read-only mount
Type string `json:"type"` // bind, volume, tmpfs
}
// PlacementConfig defines where containers should be placed
type PlacementConfig struct {
Constraints []string `json:"constraints"` // Node constraints
Preferences []PlacementPref `json:"preferences"` // Placement preferences
Platforms []Platform `json:"platforms"` // Target platforms
}
// PlacementPref defines placement preferences
type PlacementPref struct {
Spread string `json:"spread"` // Spread across nodes
}
// Platform defines target platform for containers
type Platform struct {
Architecture string `json:"architecture"` // amd64, arm64, etc.
OS string `json:"os"` // linux, windows
}
// DeployAgent deploys an agent service to Docker Swarm
func (sm *SwarmManager) DeployAgent(config *AgentDeploymentConfig) (*swarm.Service, error) {
log.Info().
Str("team_id", config.TeamID).
Str("task_id", config.TaskID).
Str("agent_role", config.AgentRole).
Str("image", config.Image).
Msg("🚀 Deploying agent to Docker Swarm")
// Generate unique service name
serviceName := fmt.Sprintf("whoosh-agent-%s-%s-%s",
config.TeamID[:8],
config.TaskID[:8],
config.AgentRole,
)
// Build environment variables
env := sm.buildEnvironment(config)
// Build volume mounts
mounts := sm.buildMounts(config.Volumes)
// Build resource specifications
resources := sm.buildResources(config.Resources)
// Build placement constraints
placement := sm.buildPlacement(config.Placement)
// Create service specification
serviceSpec := swarm.ServiceSpec{
Annotations: swarm.Annotations{
Name: serviceName,
Labels: map[string]string{
"whoosh.team_id": config.TeamID,
"whoosh.task_id": config.TaskID,
"whoosh.agent_role": config.AgentRole,
"whoosh.agent_type": config.AgentType,
"whoosh.managed_by": "whoosh",
"whoosh.created_at": time.Now().Format(time.RFC3339),
},
},
TaskTemplate: swarm.TaskSpec{
ContainerSpec: &swarm.ContainerSpec{
Image: config.Image,
Env: env,
Mounts: mounts,
Labels: map[string]string{
"whoosh.team_id": config.TeamID,
"whoosh.task_id": config.TaskID,
"whoosh.agent_role": config.AgentRole,
},
// Add healthcheck
Healthcheck: &container.HealthConfig{
Test: []string{"CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"},
Interval: 30 * time.Second,
Timeout: 10 * time.Second,
Retries: 3,
},
},
Resources: resources,
Placement: placement,
Networks: sm.buildNetworks(config.Networks),
},
Mode: swarm.ServiceMode{
Replicated: &swarm.ReplicatedService{
Replicas: &config.Replicas,
},
},
UpdateConfig: &swarm.UpdateConfig{
Parallelism: 1,
Order: "start-first",
},
// RollbackConfig removed for compatibility
}
// Create the service
response, err := sm.client.ServiceCreate(sm.ctx, serviceSpec, types.ServiceCreateOptions{})
if err != nil {
return nil, fmt.Errorf("failed to create agent service: %w", err)
}
log.Info().
Str("service_id", response.ID).
Str("service_name", serviceName).
Msg("✅ Agent service created successfully")
// Wait for service to be created and return service info
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, response.ID, types.ServiceInspectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to inspect created service: %w", err)
}
return &service, nil
}
// buildEnvironment constructs environment variables for the container
func (sm *SwarmManager) buildEnvironment(config *AgentDeploymentConfig) []string {
env := []string{
fmt.Sprintf("WHOOSH_TEAM_ID=%s", config.TeamID),
fmt.Sprintf("WHOOSH_TASK_ID=%s", config.TaskID),
fmt.Sprintf("WHOOSH_AGENT_ROLE=%s", config.AgentRole),
fmt.Sprintf("WHOOSH_AGENT_TYPE=%s", config.AgentType),
}
// Add task context as environment variables
if config.TaskContext.IssueTitle != "" {
env = append(env, fmt.Sprintf("TASK_TITLE=%s", config.TaskContext.IssueTitle))
}
if config.TaskContext.Repository != "" {
env = append(env, fmt.Sprintf("TASK_REPOSITORY=%s", config.TaskContext.Repository))
}
if config.TaskContext.Priority != "" {
env = append(env, fmt.Sprintf("TASK_PRIORITY=%s", config.TaskContext.Priority))
}
if config.TaskContext.ExternalURL != "" {
env = append(env, fmt.Sprintf("TASK_EXTERNAL_URL=%s", config.TaskContext.ExternalURL))
}
// Add tech stack as JSON
if len(config.TaskContext.TechStack) > 0 {
techStackJSON, _ := json.Marshal(config.TaskContext.TechStack)
env = append(env, fmt.Sprintf("TASK_TECH_STACK=%s", string(techStackJSON)))
}
// Add requirements as JSON
if len(config.TaskContext.Requirements) > 0 {
requirementsJSON, _ := json.Marshal(config.TaskContext.Requirements)
env = append(env, fmt.Sprintf("TASK_REQUIREMENTS=%s", string(requirementsJSON)))
}
// Add custom environment variables
for key, value := range config.Environment {
env = append(env, fmt.Sprintf("%s=%s", key, value))
}
return env
}
// buildMounts constructs volume mounts for the container
func (sm *SwarmManager) buildMounts(volumes []VolumeMount) []mount.Mount {
mounts := make([]mount.Mount, len(volumes))
for i, vol := range volumes {
mountType := mount.TypeBind
switch vol.Type {
case "volume":
mountType = mount.TypeVolume
case "tmpfs":
mountType = mount.TypeTmpfs
}
mounts[i] = mount.Mount{
Type: mountType,
Source: vol.Source,
Target: vol.Target,
ReadOnly: vol.ReadOnly,
}
}
// Add default workspace volume
mounts = append(mounts, mount.Mount{
Type: mount.TypeVolume,
Source: fmt.Sprintf("whoosh-workspace"), // Shared workspace volume
Target: "/workspace",
ReadOnly: false,
})
return mounts
}
// buildResources constructs resource specifications
func (sm *SwarmManager) buildResources(limits ResourceLimits) *swarm.ResourceRequirements {
resources := &swarm.ResourceRequirements{}
// Set limits
if limits.CPULimit > 0 || limits.MemoryLimit > 0 {
resources.Limits = &swarm.Limit{}
if limits.CPULimit > 0 {
resources.Limits.NanoCPUs = limits.CPULimit
}
if limits.MemoryLimit > 0 {
resources.Limits.MemoryBytes = limits.MemoryLimit
}
}
// Set requests/reservations
if limits.CPURequest > 0 || limits.MemoryRequest > 0 {
resources.Reservations = &swarm.Resources{}
if limits.CPURequest > 0 {
resources.Reservations.NanoCPUs = limits.CPURequest
}
if limits.MemoryRequest > 0 {
resources.Reservations.MemoryBytes = limits.MemoryRequest
}
}
return resources
}
// buildPlacement constructs placement specifications
func (sm *SwarmManager) buildPlacement(config PlacementConfig) *swarm.Placement {
placement := &swarm.Placement{
Constraints: config.Constraints,
}
// Add preferences
for _, pref := range config.Preferences {
placement.Preferences = append(placement.Preferences, swarm.PlacementPreference{
Spread: &swarm.SpreadOver{
SpreadDescriptor: pref.Spread,
},
})
}
// Add platforms
for _, platform := range config.Platforms {
placement.Platforms = append(placement.Platforms, swarm.Platform{
Architecture: platform.Architecture,
OS: platform.OS,
})
}
return placement
}
// buildNetworks constructs network specifications
func (sm *SwarmManager) buildNetworks(networks []string) []swarm.NetworkAttachmentConfig {
if len(networks) == 0 {
// Default to chorus_default network
networks = []string{"chorus_default"}
}
networkConfigs := make([]swarm.NetworkAttachmentConfig, len(networks))
for i, networkName := range networks {
networkConfigs[i] = swarm.NetworkAttachmentConfig{
Target: networkName,
}
}
return networkConfigs
}
// RemoveAgent removes an agent service from Docker Swarm
func (sm *SwarmManager) RemoveAgent(serviceID string) error {
log.Info().
Str("service_id", serviceID).
Msg("🗑️ Removing agent service from Docker Swarm")
err := sm.client.ServiceRemove(sm.ctx, serviceID)
if err != nil {
return fmt.Errorf("failed to remove service: %w", err)
}
log.Info().
Str("service_id", serviceID).
Msg("✅ Agent service removed successfully")
return nil
}
// ListAgentServices lists all agent services managed by WHOOSH
func (sm *SwarmManager) ListAgentServices() ([]swarm.Service, error) {
services, err := sm.client.ServiceList(sm.ctx, types.ServiceListOptions{
Filters: filters.NewArgs(),
})
if err != nil {
return nil, fmt.Errorf("failed to list services: %w", err)
}
// Filter for WHOOSH-managed services
var agentServices []swarm.Service
for _, service := range services {
if managed, exists := service.Spec.Labels["whoosh.managed_by"]; exists && managed == "whoosh" {
agentServices = append(agentServices, service)
}
}
return agentServices, nil
}
// GetServiceLogs retrieves logs for a service
func (sm *SwarmManager) GetServiceLogs(serviceID string, lines int) (string, error) {
options := types.ContainerLogsOptions{
ShowStdout: true,
ShowStderr: true,
Tail: fmt.Sprintf("%d", lines),
Timestamps: true,
}
reader, err := sm.client.ServiceLogs(sm.ctx, serviceID, options)
if err != nil {
return "", fmt.Errorf("failed to get service logs: %w", err)
}
defer reader.Close()
logs, err := io.ReadAll(reader)
if err != nil {
return "", fmt.Errorf("failed to read service logs: %w", err)
}
return string(logs), nil
}
// ScaleService scales a service to the specified number of replicas
func (sm *SwarmManager) ScaleService(serviceID string, replicas uint64) error {
log.Info().
Str("service_id", serviceID).
Uint64("replicas", replicas).
Msg("📈 Scaling agent service")
// Get current service spec
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, serviceID, types.ServiceInspectOptions{})
if err != nil {
return fmt.Errorf("failed to inspect service: %w", err)
}
// Update replicas
service.Spec.Mode.Replicated.Replicas = &replicas
// Update the service
_, err = sm.client.ServiceUpdate(sm.ctx, serviceID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
return fmt.Errorf("failed to scale service: %w", err)
}
log.Info().
Str("service_id", serviceID).
Uint64("replicas", replicas).
Msg("✅ Service scaled successfully")
return nil
}
// GetServiceStatus returns the current status of a service
func (sm *SwarmManager) GetServiceStatus(serviceID string) (*ServiceStatus, error) {
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, serviceID, types.ServiceInspectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to inspect service: %w", err)
}
// Get task status
tasks, err := sm.client.TaskList(sm.ctx, types.TaskListOptions{
Filters: filters.NewArgs(filters.Arg("service", serviceID)),
})
if err != nil {
return nil, fmt.Errorf("failed to list tasks: %w", err)
}
status := &ServiceStatus{
ServiceID: serviceID,
ServiceName: service.Spec.Name,
Image: service.Spec.TaskTemplate.ContainerSpec.Image,
Replicas: 0,
RunningTasks: 0,
FailedTasks: 0,
TaskStates: make(map[string]int),
CreatedAt: service.CreatedAt,
UpdatedAt: service.UpdatedAt,
}
if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil {
status.Replicas = *service.Spec.Mode.Replicated.Replicas
}
// Count task states
for _, task := range tasks {
state := string(task.Status.State)
status.TaskStates[state]++
switch task.Status.State {
case swarm.TaskStateRunning:
status.RunningTasks++
case swarm.TaskStateFailed:
status.FailedTasks++
}
}
return status, nil
}
// ServiceStatus represents the current status of a service
type ServiceStatus struct {
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
Image string `json:"image"`
Replicas uint64 `json:"replicas"`
RunningTasks uint64 `json:"running_tasks"`
FailedTasks uint64 `json:"failed_tasks"`
TaskStates map[string]int `json:"task_states"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// CleanupFailedServices removes failed services
func (sm *SwarmManager) CleanupFailedServices() error {
services, err := sm.ListAgentServices()
if err != nil {
return fmt.Errorf("failed to list services: %w", err)
}
for _, service := range services {
status, err := sm.GetServiceStatus(service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to get service status")
continue
}
// Remove services with all failed tasks and no running tasks
if status.FailedTasks > 0 && status.RunningTasks == 0 {
log.Warn().
Str("service_id", service.ID).
Str("service_name", service.Spec.Name).
Uint64("failed_tasks", status.FailedTasks).
Msg("Removing failed service")
err = sm.RemoveAgent(service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to remove failed service")
}
}
}
return nil
}

View File

@@ -2,7 +2,6 @@ package p2p
import (
"context"
"fmt"
"net"
"net/http"
"sync"
@@ -119,155 +118,87 @@ func (d *Discovery) GetAgents() []*Agent {
// listenForBroadcasts listens for CHORUS agent P2P broadcasts
func (d *Discovery) listenForBroadcasts() {
// For now, simulate discovering the 9 CHORUS replicas that are running
// In a full implementation, this would listen on UDP multicast for actual P2P broadcasts
log.Info().Msg("🔍 Starting real CHORUS agent discovery")
log.Info().Msg("🔍 Simulating P2P discovery of CHORUS agents")
// Since we know CHORUS is running 9 replicas, let's simulate discovering them
ticker := time.NewTicker(10 * time.Second)
// Real discovery polling every 30 seconds to avoid overwhelming the service
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
// Run initial discovery immediately
d.discoverRealCHORUSAgents()
for {
select {
case <-d.ctx.Done():
return
case <-ticker.C:
d.simulateAgentDiscovery()
d.discoverRealCHORUSAgents()
}
}
}
// simulateAgentDiscovery discovers CHORUS agents by querying their health endpoints
func (d *Discovery) simulateAgentDiscovery() {
log.Debug().Msg("🔍 Discovering CHORUS agents via health endpoints")
// discoverRealCHORUSAgents discovers actual CHORUS agents by querying their health endpoints
func (d *Discovery) discoverRealCHORUSAgents() {
log.Debug().Msg("🔍 Discovering real CHORUS agents via health endpoints")
// Query Docker DNS for CHORUS service tasks
// In Docker Swarm, tasks can be discovered via the service name
d.discoverCHORUSReplicas()
// Query the actual CHORUS service to see what's running
d.queryActualCHORUSService()
}
// discoverCHORUSReplicas discovers running CHORUS replicas in the Docker Swarm network.
// This function implements a discovery strategy that works around Docker Swarm's round-robin
// DNS by making multiple requests to discover individual service replicas.
//
// Technical challenges and solutions:
// 1. Docker Swarm round-robin DNS makes it hard to discover individual replicas
// 2. We use multiple HTTP requests to hit different replicas via load balancer
// 3. Generate synthetic agent IDs since CHORUS doesn't expose unique identifiers yet
// 4. Create realistic agent metadata for team formation algorithms
//
// This approach is a pragmatic MVP solution - in production, CHORUS agents would
// register themselves with unique IDs and capabilities via a proper discovery protocol.
func (d *Discovery) discoverCHORUSReplicas() {
// HTTP client with short timeout for health checks. We use 5 seconds because:
// 1. Health endpoints should respond quickly (< 1s typically)
// 2. We're making multiple requests, so timeouts add up
// 3. Docker Swarm networking is usually fast within cluster
client := &http.Client{Timeout: 5 * time.Second}
baseTime := time.Now() // Consistent timestamp for this discovery cycle
// queryActualCHORUSService queries the real CHORUS service to discover actual running agents.
// This function replaces the previous simulation and discovers only what's actually running.
func (d *Discovery) queryActualCHORUSService() {
client := &http.Client{Timeout: 10 * time.Second}
// Local map to track agents discovered in this cycle. We use a map to ensure
// we don't create duplicate agents if we happen to hit the same replica twice.
discovered := make(map[string]*Agent)
// Try to query the CHORUS health endpoint
endpoint := "http://chorus:8081/health"
resp, err := client.Get(endpoint)
if err != nil {
log.Debug().
Err(err).
Str("endpoint", endpoint).
Msg("Failed to reach CHORUS health endpoint")
return
}
defer resp.Body.Close()
// Discovery strategy: Make multiple requests to the service endpoint.
// Docker Swarm's round-robin load balancing will distribute these across
// different replicas, allowing us to discover individual instances.
// 15 attempts gives us good coverage of a 9-replica service.
for attempt := 1; attempt <= 15; attempt++ {
// Use the CHORUS health port (8081) rather than API port (8080) because:
// 1. Health endpoints are lightweight and fast
// 2. They don't require authentication or complex request processing
// 3. They're designed to be called frequently for monitoring
endpoint := "http://chorus:8081/health"
// Make the health check request. Docker Swarm will route this to one
// of the available CHORUS replicas based on its load balancing algorithm.
resp, err := client.Get(endpoint)
if err != nil {
// Log connection failures at debug level since some failures are expected
// during service startup or when replicas are being updated.
log.Debug().
Err(err).
Str("endpoint", endpoint).
Int("attempt", attempt).
Msg("Failed to query CHORUS health endpoint")
continue
}
// Process successful health check responses
if resp.StatusCode == http.StatusOK {
// Generate a synthetic agent ID since CHORUS doesn't provide unique IDs yet.
// In production, this would come from the health check response body.
// Using zero-padded numbers ensures consistent sorting in the UI.
agentID := fmt.Sprintf("chorus-agent-%03d", len(discovered)+1)
// Only create new agent if we haven't seen this ID before in this cycle
if _, exists := discovered[agentID]; !exists {
// Create agent with realistic metadata for team formation.
// These capabilities and models would normally come from the
// actual CHORUS agent configuration.
agent := &Agent{
ID: agentID,
Name: fmt.Sprintf("CHORUS Agent %d", len(discovered)+1),
Status: "online", // Default to online since health check succeeded
// Standard CHORUS agent capabilities - these define what types of
// tasks the agent can handle in team formation algorithms
Capabilities: []string{"general_development", "task_coordination", "ai_integration"},
Model: "llama3.1:8b", // Standard model for CHORUS agents
Endpoint: "http://chorus:8080", // API port for task assignment
LastSeen: baseTime, // Consistent timestamp for this discovery cycle
// Synthetic task completion count for load balancing algorithms.
// In production, this would be actual metrics from agent performance.
TasksCompleted: len(discovered) * 2,
P2PAddr: "chorus:9000", // P2P communication port
ClusterID: "docker-unified-stack", // Docker Swarm cluster identifier
}
// Add some variety to agent status for realistic team formation testing.
// This simulates real-world scenarios where agents have different availability.
if len(discovered)%3 == 0 {
agent.Status = "idle" // Every third agent is idle
} else if len(discovered) == 6 {
// One agent is actively working on a team assignment
agent.Status = "working"
agent.CurrentTeam = "development-team-alpha"
}
// Add to discovered agents and log the discovery
discovered[agentID] = agent
log.Debug().
Str("agent_id", agentID).
Str("status", agent.Status).
Msg("🤖 Discovered CHORUS agent")
}
}
resp.Body.Close()
// Stop discovery once we've found the expected number of agents.
// This prevents unnecessary HTTP requests and speeds up discovery cycles.
if len(discovered) >= 9 {
break
}
// Brief pause between requests to avoid overwhelming the service and
// to allow Docker Swarm's load balancer to potentially route to different replicas.
time.Sleep(100 * time.Millisecond)
if resp.StatusCode != http.StatusOK {
log.Debug().
Int("status_code", resp.StatusCode).
Str("endpoint", endpoint).
Msg("CHORUS health endpoint returned non-200 status")
return
}
// Add all discovered agents
for _, agent := range discovered {
d.addOrUpdateAgent(agent)
// CHORUS is responding, so create a single agent entry for the actual instance
agentID := "chorus-agent-001"
agent := &Agent{
ID: agentID,
Name: "CHORUS Agent",
Status: "online",
Capabilities: []string{
"general_development",
"task_coordination",
"ai_integration",
"code_analysis",
"autonomous_development",
},
Model: "llama3.1:8b",
Endpoint: "http://chorus:8080",
LastSeen: time.Now(),
TasksCompleted: 0, // Will be updated by actual task completion tracking
P2PAddr: "chorus:9000",
ClusterID: "docker-unified-stack",
}
// Check if CHORUS has an API endpoint that provides more detailed info
// For now, we'll just use the single discovered instance
d.addOrUpdateAgent(agent)
log.Info().
Int("discovered_count", len(discovered)).
Msg("🎭 CHORUS agent discovery completed")
Str("agent_id", agentID).
Str("endpoint", endpoint).
Msg("🤖 Discovered real CHORUS agent")
}
// addOrUpdateAgent adds or updates an agent in the discovery cache

View File

@@ -11,12 +11,15 @@ import (
"strings"
"time"
"github.com/chorus-services/whoosh/internal/agents"
"github.com/chorus-services/whoosh/internal/backbeat"
"github.com/chorus-services/whoosh/internal/composer"
"github.com/chorus-services/whoosh/internal/config"
"github.com/chorus-services/whoosh/internal/council"
"github.com/chorus-services/whoosh/internal/database"
"github.com/chorus-services/whoosh/internal/gitea"
"github.com/chorus-services/whoosh/internal/monitor"
"github.com/chorus-services/whoosh/internal/orchestrator"
"github.com/chorus-services/whoosh/internal/p2p"
"github.com/chorus-services/whoosh/internal/tasks"
"github.com/go-chi/chi/v5"
@@ -27,6 +30,14 @@ import (
"github.com/rs/zerolog/log"
)
// Global version variable set by main package
var version = "development"
// SetVersion sets the global version variable
func SetVersion(v string) {
version = v
}
type Server struct {
config *config.Config
db *database.DB
@@ -35,11 +46,15 @@ type Server struct {
giteaClient *gitea.Client
webhookHandler *gitea.WebhookHandler
p2pDiscovery *p2p.Discovery
agentRegistry *agents.Registry
backbeat *backbeat.Integration
teamComposer *composer.Service
councilComposer *council.CouncilComposer
taskService *tasks.Service
giteaIntegration *tasks.GiteaIntegration
repoMonitor *monitor.Monitor
swarmManager *orchestrator.SwarmManager
agentDeployer *orchestrator.AgentDeployer
}
func NewServer(cfg *config.Config, db *database.DB) (*Server, error) {
@@ -47,19 +62,49 @@ func NewServer(cfg *config.Config, db *database.DB) (*Server, error) {
taskService := tasks.NewService(db.Pool)
giteaIntegration := tasks.NewGiteaIntegration(taskService, gitea.NewClient(cfg.GITEA), nil)
// Initialize repository monitor
repoMonitor := monitor.NewMonitor(db.Pool, cfg.GITEA)
// Initialize P2P discovery and agent registry
p2pDiscovery := p2p.NewDiscovery()
agentRegistry := agents.NewRegistry(db.Pool, p2pDiscovery)
// Initialize team composer
teamComposer := composer.NewService(db.Pool, nil) // Use default config
// Initialize council composer for project kickoffs
councilComposer := council.NewCouncilComposer(db.Pool)
// Initialize Docker Swarm orchestrator services conditionally
var swarmManager *orchestrator.SwarmManager
var agentDeployer *orchestrator.AgentDeployer
if cfg.Docker.Enabled {
var err error
swarmManager, err = orchestrator.NewSwarmManager("", "registry.home.deepblack.cloud")
if err != nil {
return nil, fmt.Errorf("failed to create swarm manager: %w", err)
}
agentDeployer = orchestrator.NewAgentDeployer(swarmManager, db.Pool, "registry.home.deepblack.cloud")
} else {
log.Warn().Msg("🐳 Docker integration disabled - council agent deployment unavailable")
}
// Initialize repository monitor with team composer, council composer, and agent deployer
repoMonitor := monitor.NewMonitor(db.Pool, cfg.GITEA, teamComposer, councilComposer, agentDeployer)
s := &Server{
config: cfg,
db: db,
giteaClient: gitea.NewClient(cfg.GITEA),
webhookHandler: gitea.NewWebhookHandler(cfg.GITEA.WebhookToken),
p2pDiscovery: p2p.NewDiscovery(),
teamComposer: composer.NewService(db.Pool, nil), // Use default config
p2pDiscovery: p2pDiscovery,
agentRegistry: agentRegistry,
teamComposer: teamComposer,
councilComposer: councilComposer,
taskService: taskService,
giteaIntegration: giteaIntegration,
repoMonitor: repoMonitor,
swarmManager: swarmManager,
agentDeployer: agentDeployer,
}
// Initialize BACKBEAT integration if enabled
@@ -206,6 +251,11 @@ func (s *Server) Start(ctx context.Context) error {
return fmt.Errorf("failed to start P2P discovery: %w", err)
}
// Start agent registry service
if err := s.agentRegistry.Start(); err != nil {
return fmt.Errorf("failed to start agent registry: %w", err)
}
// Start repository monitoring service
if s.repoMonitor != nil {
go func() {
@@ -237,6 +287,11 @@ func (s *Server) Shutdown(ctx context.Context) error {
}
}
// Stop agent registry service
if err := s.agentRegistry.Stop(); err != nil {
log.Error().Err(err).Msg("Failed to stop agent registry service")
}
// Stop P2P discovery service
if err := s.p2pDiscovery.Stop(); err != nil {
log.Error().Err(err).Msg("Failed to stop P2P discovery service")
@@ -1778,7 +1833,10 @@ func (s *Server) dashboardHandler(w http.ResponseWriter, r *http.Request) {
<body>
<div class="header">
<div class="header-content">
<div class="logo">🎭 WHOOSH</div>
<div style="display: flex; align-items: center;">
<div class="logo">🎭 WHOOSH</div>
<div style="margin-left: 16px; font-size: 14px; opacity: 0.8;">v` + version + `</div>
</div>
<div style="display: flex; align-items: center;">
<span class="status-dot"></span>
<span>System Online</span>