Implement initial scan logic and council formation for WHOOSH project kickoffs

- Replace incremental sync with full scan for new repositories
- Add initial_scan status to bypass Since parameter filtering
- Implement council formation detection for Design Brief issues
- Add version display to WHOOSH UI header for debugging
- Fix Docker token authentication with trailing newline removal
- Add comprehensive council orchestration with Docker Swarm integration
- Include BACKBEAT prototype integration for distributed timing
- Support council-specific agent roles and deployment strategies
- Transition repositories to active status after content discovery

Key architectural improvements:
- Full scan approach for new project detection vs incremental sync
- Council formation triggered by chorus-entrypoint labeled Design Briefs
- Proper token handling and authentication for Gitea API calls
- Support for both initial discovery and ongoing task monitoring

This enables autonomous project kickoff workflows where Design Brief issues
automatically trigger formation of specialized agent councils for new projects.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Code
2025-09-12 09:49:36 +10:00
parent b5c0deb6bc
commit 56ea52b743
74 changed files with 17778 additions and 236 deletions

View File

@@ -0,0 +1,591 @@
package orchestrator
import (
"context"
"fmt"
"time"
"github.com/chorus-services/whoosh/internal/composer"
"github.com/chorus-services/whoosh/internal/council"
"github.com/docker/docker/api/types/swarm"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
)
// AgentDeployer manages deployment of agent containers for teams
type AgentDeployer struct {
swarmManager *SwarmManager
db *pgxpool.Pool
registry string
ctx context.Context
cancel context.CancelFunc
}
// NewAgentDeployer creates a new agent deployer
func NewAgentDeployer(swarmManager *SwarmManager, db *pgxpool.Pool, registry string) *AgentDeployer {
ctx, cancel := context.WithCancel(context.Background())
if registry == "" {
registry = "registry.home.deepblack.cloud"
}
return &AgentDeployer{
swarmManager: swarmManager,
db: db,
registry: registry,
ctx: ctx,
cancel: cancel,
}
}
// Close shuts down the agent deployer
func (ad *AgentDeployer) Close() error {
ad.cancel()
return nil
}
// DeploymentRequest represents a request to deploy agents for a team
type DeploymentRequest struct {
TeamID uuid.UUID `json:"team_id"`
TaskID uuid.UUID `json:"task_id"`
TeamComposition *composer.TeamComposition `json:"team_composition"`
TaskContext *TaskContext `json:"task_context"`
DeploymentMode string `json:"deployment_mode"` // immediate, scheduled, manual
}
// DeploymentResult represents the result of a deployment operation
type DeploymentResult struct {
TeamID uuid.UUID `json:"team_id"`
TaskID uuid.UUID `json:"task_id"`
DeployedServices []DeployedService `json:"deployed_services"`
Status string `json:"status"` // success, partial, failed
Message string `json:"message"`
DeployedAt time.Time `json:"deployed_at"`
Errors []string `json:"errors,omitempty"`
}
// DeployedService represents a successfully deployed service
type DeployedService struct {
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
AgentRole string `json:"agent_role"`
AgentID string `json:"agent_id"`
Image string `json:"image"`
Status string `json:"status"`
}
// CouncilDeploymentRequest represents a request to deploy council agents
type CouncilDeploymentRequest struct {
CouncilID uuid.UUID `json:"council_id"`
ProjectName string `json:"project_name"`
CouncilComposition *council.CouncilComposition `json:"council_composition"`
ProjectContext *CouncilProjectContext `json:"project_context"`
DeploymentMode string `json:"deployment_mode"` // immediate, scheduled, manual
}
// CouncilProjectContext contains the project information for council agents
type CouncilProjectContext struct {
ProjectName string `json:"project_name"`
Repository string `json:"repository"`
ProjectBrief string `json:"project_brief"`
Constraints string `json:"constraints,omitempty"`
TechLimits string `json:"tech_limits,omitempty"`
ComplianceNotes string `json:"compliance_notes,omitempty"`
Targets string `json:"targets,omitempty"`
ExternalURL string `json:"external_url,omitempty"`
}
// DeployTeamAgents deploys all agents for a team
func (ad *AgentDeployer) DeployTeamAgents(request *DeploymentRequest) (*DeploymentResult, error) {
log.Info().
Str("team_id", request.TeamID.String()).
Str("task_id", request.TaskID.String()).
Int("agent_matches", len(request.TeamComposition.AgentMatches)).
Msg("🚀 Starting team agent deployment")
result := &DeploymentResult{
TeamID: request.TeamID,
TaskID: request.TaskID,
DeployedServices: []DeployedService{},
DeployedAt: time.Now(),
Errors: []string{},
}
// Deploy each agent in the team composition
for _, agentMatch := range request.TeamComposition.AgentMatches {
service, err := ad.deploySingleAgent(request, agentMatch)
if err != nil {
errorMsg := fmt.Sprintf("Failed to deploy agent %s for role %s: %v",
agentMatch.Agent.Name, agentMatch.Role.Name, err)
result.Errors = append(result.Errors, errorMsg)
log.Error().
Err(err).
Str("agent_id", agentMatch.Agent.ID.String()).
Str("role", agentMatch.Role.Name).
Msg("Failed to deploy agent")
continue
}
deployedService := DeployedService{
ServiceID: service.ID,
ServiceName: service.Spec.Name,
AgentRole: agentMatch.Role.Name,
AgentID: agentMatch.Agent.ID.String(),
Image: service.Spec.TaskTemplate.ContainerSpec.Image,
Status: "deploying",
}
result.DeployedServices = append(result.DeployedServices, deployedService)
// Update database with deployment info
err = ad.recordDeployment(request.TeamID, request.TaskID, agentMatch, service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to record deployment in database")
}
}
// Determine overall deployment status
if len(result.Errors) == 0 {
result.Status = "success"
result.Message = fmt.Sprintf("Successfully deployed %d agents", len(result.DeployedServices))
} else if len(result.DeployedServices) > 0 {
result.Status = "partial"
result.Message = fmt.Sprintf("Deployed %d/%d agents with %d errors",
len(result.DeployedServices),
len(request.TeamComposition.AgentMatches),
len(result.Errors))
} else {
result.Status = "failed"
result.Message = "Failed to deploy any agents"
}
// Update team deployment status in database
err := ad.updateTeamDeploymentStatus(request.TeamID, result.Status, result.Message)
if err != nil {
log.Error().
Err(err).
Str("team_id", request.TeamID.String()).
Msg("Failed to update team deployment status")
}
log.Info().
Str("team_id", request.TeamID.String()).
Str("status", result.Status).
Int("deployed", len(result.DeployedServices)).
Int("errors", len(result.Errors)).
Msg("✅ Team agent deployment completed")
return result, nil
}
// selectAgentImage determines the appropriate CHORUS image for the agent role
func (ad *AgentDeployer) selectAgentImage(roleName string, agent *composer.Agent) string {
// All agents use the same CHORUS image, but with different configurations
// The image handles role specialization internally based on environment variables
return "docker.io/anthonyrawlins/chorus:backbeat-v2.0.1"
}
// buildAgentEnvironment creates environment variables for CHORUS agent configuration
func (ad *AgentDeployer) buildAgentEnvironment(request *DeploymentRequest, agentMatch *composer.AgentMatch) map[string]string {
env := map[string]string{
// Core CHORUS configuration - just pass the agent name from human-roles.yaml
// CHORUS will handle its own prompt composition and system behavior
"CHORUS_AGENT_NAME": agentMatch.Role.Name, // This maps to human-roles.yaml agent definition
"CHORUS_TEAM_ID": request.TeamID.String(),
"CHORUS_TASK_ID": request.TaskID.String(),
// Essential task context
"CHORUS_PROJECT": request.TaskContext.Repository,
"CHORUS_TASK_TITLE": request.TaskContext.IssueTitle,
"CHORUS_TASK_DESC": request.TaskContext.IssueDescription,
"CHORUS_PRIORITY": request.TaskContext.Priority,
"CHORUS_EXTERNAL_URL": request.TaskContext.ExternalURL,
// WHOOSH coordination
"WHOOSH_COORDINATOR": "true",
"WHOOSH_ENDPOINT": "http://whoosh:8080",
// Docker access for CHORUS sandbox management
"DOCKER_HOST": "unix:///var/run/docker.sock",
}
return env
}
// Note: CHORUS handles its own prompt composition from human-roles.yaml
// We just need to pass the agent name and essential task context
// determineAgentType maps role to agent type for resource allocation
func (ad *AgentDeployer) determineAgentType(agentMatch *composer.AgentMatch) string {
// Simple mapping for now - could be enhanced based on role complexity
return "standard"
}
// calculateResources determines resource requirements for the agent
func (ad *AgentDeployer) calculateResources(agentMatch *composer.AgentMatch) ResourceLimits {
// Standard resource allocation for CHORUS agents
// CHORUS handles its own resource management internally
return ResourceLimits{
CPULimit: 1000000000, // 1 CPU core
MemoryLimit: 1073741824, // 1GB RAM
CPURequest: 500000000, // 0.5 CPU core
MemoryRequest: 536870912, // 512MB RAM
}
}
// buildAgentVolumes creates volume mounts for CHORUS agents
func (ad *AgentDeployer) buildAgentVolumes(request *DeploymentRequest) []VolumeMount {
return []VolumeMount{
{
Type: "bind",
Source: "/var/run/docker.sock",
Target: "/var/run/docker.sock",
ReadOnly: false, // CHORUS needs Docker access for sandboxing
},
{
Type: "volume",
Source: fmt.Sprintf("whoosh-workspace-%s", request.TeamID.String()),
Target: "/workspace",
ReadOnly: false,
},
}
}
// buildAgentPlacement creates placement constraints for agents
func (ad *AgentDeployer) buildAgentPlacement(agentMatch *composer.AgentMatch) PlacementConfig {
return PlacementConfig{
Constraints: []string{
"node.role==worker", // Prefer worker nodes for agent containers
},
// Note: Placement preferences removed for compilation compatibility
}
}
// deploySingleAgent deploys a single agent for a specific role
func (ad *AgentDeployer) deploySingleAgent(request *DeploymentRequest, agentMatch *composer.AgentMatch) (*swarm.Service, error) {
// Determine agent image based on role
image := ad.selectAgentImage(agentMatch.Role.Name, agentMatch.Agent)
// Build deployment configuration
config := &AgentDeploymentConfig{
TeamID: request.TeamID.String(),
TaskID: request.TaskID.String(),
AgentRole: agentMatch.Role.Name,
AgentType: ad.determineAgentType(agentMatch),
Image: image,
Replicas: 1, // Start with single replica per agent
Resources: ad.calculateResources(agentMatch),
Environment: ad.buildAgentEnvironment(request, agentMatch),
TaskContext: *request.TaskContext,
Networks: []string{"chorus_default"},
Volumes: ad.buildAgentVolumes(request),
Placement: ad.buildAgentPlacement(agentMatch),
}
// Deploy the service
service, err := ad.swarmManager.DeployAgent(config)
if err != nil {
return nil, fmt.Errorf("failed to deploy agent service: %w", err)
}
return service, nil
}
// recordDeployment records agent deployment information in the database
func (ad *AgentDeployer) recordDeployment(teamID uuid.UUID, taskID uuid.UUID, agentMatch *composer.AgentMatch, serviceID string) error {
query := `
INSERT INTO agent_deployments (team_id, task_id, agent_id, role_id, service_id, status, deployed_at)
VALUES ($1, $2, $3, $4, $5, $6, NOW())
`
_, err := ad.db.Exec(ad.ctx, query, teamID, taskID, agentMatch.Agent.ID, agentMatch.Role.ID, serviceID, "deployed")
return err
}
// updateTeamDeploymentStatus updates the team deployment status in the database
func (ad *AgentDeployer) updateTeamDeploymentStatus(teamID uuid.UUID, status, message string) error {
query := `
UPDATE teams
SET deployment_status = $1, deployment_message = $2, updated_at = NOW()
WHERE id = $3
`
_, err := ad.db.Exec(ad.ctx, query, status, message, teamID)
return err
}
// DeployCouncilAgents deploys all agents for a project kickoff council
func (ad *AgentDeployer) DeployCouncilAgents(request *CouncilDeploymentRequest) (*council.CouncilDeploymentResult, error) {
log.Info().
Str("council_id", request.CouncilID.String()).
Str("project_name", request.ProjectName).
Int("core_agents", len(request.CouncilComposition.CoreAgents)).
Int("optional_agents", len(request.CouncilComposition.OptionalAgents)).
Msg("🎭 Starting council agent deployment")
result := &council.CouncilDeploymentResult{
CouncilID: request.CouncilID,
ProjectName: request.ProjectName,
DeployedAgents: []council.DeployedCouncilAgent{},
DeployedAt: time.Now(),
Errors: []string{},
}
// Deploy core agents (required)
for _, agent := range request.CouncilComposition.CoreAgents {
deployedAgent, err := ad.deploySingleCouncilAgent(request, agent)
if err != nil {
errorMsg := fmt.Sprintf("Failed to deploy core agent %s (%s): %v",
agent.AgentName, agent.RoleName, err)
result.Errors = append(result.Errors, errorMsg)
log.Error().
Err(err).
Str("agent_id", agent.AgentID).
Str("role", agent.RoleName).
Msg("Failed to deploy core council agent")
continue
}
result.DeployedAgents = append(result.DeployedAgents, *deployedAgent)
// Update database with deployment info
err = ad.recordCouncilAgentDeployment(request.CouncilID, agent, deployedAgent.ServiceID)
if err != nil {
log.Error().
Err(err).
Str("service_id", deployedAgent.ServiceID).
Msg("Failed to record council agent deployment in database")
}
}
// Deploy optional agents (best effort)
for _, agent := range request.CouncilComposition.OptionalAgents {
deployedAgent, err := ad.deploySingleCouncilAgent(request, agent)
if err != nil {
// Optional agents failing is not critical
log.Warn().
Err(err).
Str("agent_id", agent.AgentID).
Str("role", agent.RoleName).
Msg("Failed to deploy optional council agent (non-critical)")
continue
}
result.DeployedAgents = append(result.DeployedAgents, *deployedAgent)
// Update database with deployment info
err = ad.recordCouncilAgentDeployment(request.CouncilID, agent, deployedAgent.ServiceID)
if err != nil {
log.Error().
Err(err).
Str("service_id", deployedAgent.ServiceID).
Msg("Failed to record council agent deployment in database")
}
}
// Determine overall deployment status
coreAgentsCount := len(request.CouncilComposition.CoreAgents)
deployedCoreAgents := 0
for _, deployedAgent := range result.DeployedAgents {
// Check if this deployed agent is a core agent
for _, coreAgent := range request.CouncilComposition.CoreAgents {
if coreAgent.RoleName == deployedAgent.RoleName {
deployedCoreAgents++
break
}
}
}
if deployedCoreAgents == coreAgentsCount {
result.Status = "success"
result.Message = fmt.Sprintf("Successfully deployed %d agents (%d core, %d optional)",
len(result.DeployedAgents), deployedCoreAgents, len(result.DeployedAgents)-deployedCoreAgents)
} else if deployedCoreAgents > 0 {
result.Status = "partial"
result.Message = fmt.Sprintf("Deployed %d/%d core agents with %d errors",
deployedCoreAgents, coreAgentsCount, len(result.Errors))
} else {
result.Status = "failed"
result.Message = "Failed to deploy any core council agents"
}
// Update council deployment status in database
err := ad.updateCouncilDeploymentStatus(request.CouncilID, result.Status, result.Message)
if err != nil {
log.Error().
Err(err).
Str("council_id", request.CouncilID.String()).
Msg("Failed to update council deployment status")
}
log.Info().
Str("council_id", request.CouncilID.String()).
Str("status", result.Status).
Int("deployed", len(result.DeployedAgents)).
Int("errors", len(result.Errors)).
Msg("✅ Council agent deployment completed")
return result, nil
}
// deploySingleCouncilAgent deploys a single council agent
func (ad *AgentDeployer) deploySingleCouncilAgent(request *CouncilDeploymentRequest, agent council.CouncilAgent) (*council.DeployedCouncilAgent, error) {
// Use the CHORUS image for all council agents
image := "docker.io/anthonyrawlins/chorus:backbeat-v2.0.1"
// Build council-specific deployment configuration
config := &AgentDeploymentConfig{
TeamID: request.CouncilID.String(), // Use council ID as team ID
TaskID: request.CouncilID.String(), // Use council ID as task ID
AgentRole: agent.RoleName,
AgentType: "council",
Image: image,
Replicas: 1, // Single replica per council agent
Resources: ad.calculateCouncilResources(agent),
Environment: ad.buildCouncilAgentEnvironment(request, agent),
TaskContext: TaskContext{
Repository: request.ProjectContext.Repository,
IssueTitle: request.ProjectContext.ProjectName,
IssueDescription: request.ProjectContext.ProjectBrief,
Priority: "high", // Council formation is always high priority
ExternalURL: request.ProjectContext.ExternalURL,
},
Networks: []string{"chorus_default"}, // Connect to CHORUS network
Volumes: ad.buildCouncilAgentVolumes(request),
Placement: ad.buildCouncilAgentPlacement(agent),
}
// Deploy the service
service, err := ad.swarmManager.DeployAgent(config)
if err != nil {
return nil, fmt.Errorf("failed to deploy council agent service: %w", err)
}
// Create deployed agent result
deployedAgent := &council.DeployedCouncilAgent{
ServiceID: service.ID,
ServiceName: service.Spec.Name,
RoleName: agent.RoleName,
AgentID: agent.AgentID,
Image: image,
Status: "deploying",
DeployedAt: time.Now(),
}
return deployedAgent, nil
}
// buildCouncilAgentEnvironment creates environment variables for council agent configuration
func (ad *AgentDeployer) buildCouncilAgentEnvironment(request *CouncilDeploymentRequest, agent council.CouncilAgent) map[string]string {
env := map[string]string{
// Core CHORUS configuration for council mode
"CHORUS_AGENT_NAME": agent.RoleName, // Maps to human-roles.yaml agent definition
"CHORUS_COUNCIL_MODE": "true", // Enable council mode
"CHORUS_COUNCIL_ID": request.CouncilID.String(),
"CHORUS_PROJECT_NAME": request.ProjectContext.ProjectName,
// Council prompt and context
"CHORUS_COUNCIL_PROMPT": "/app/prompts/council.md",
"CHORUS_PROJECT_BRIEF": request.ProjectContext.ProjectBrief,
"CHORUS_CONSTRAINTS": request.ProjectContext.Constraints,
"CHORUS_TECH_LIMITS": request.ProjectContext.TechLimits,
"CHORUS_COMPLIANCE_NOTES": request.ProjectContext.ComplianceNotes,
"CHORUS_TARGETS": request.ProjectContext.Targets,
// Essential project context
"CHORUS_PROJECT": request.ProjectContext.Repository,
"CHORUS_EXTERNAL_URL": request.ProjectContext.ExternalURL,
"CHORUS_PRIORITY": "high",
// WHOOSH coordination
"WHOOSH_COORDINATOR": "true",
"WHOOSH_ENDPOINT": "http://whoosh:8080",
// Docker access for CHORUS sandbox management
"DOCKER_HOST": "unix:///var/run/docker.sock",
}
return env
}
// calculateCouncilResources determines resource requirements for council agents
func (ad *AgentDeployer) calculateCouncilResources(agent council.CouncilAgent) ResourceLimits {
// Council agents get slightly more resources since they handle complex analysis
return ResourceLimits{
CPULimit: 1500000000, // 1.5 CPU cores
MemoryLimit: 2147483648, // 2GB RAM
CPURequest: 750000000, // 0.75 CPU core
MemoryRequest: 1073741824, // 1GB RAM
}
}
// buildCouncilAgentVolumes creates volume mounts for council agents
func (ad *AgentDeployer) buildCouncilAgentVolumes(request *CouncilDeploymentRequest) []VolumeMount {
return []VolumeMount{
{
Type: "bind",
Source: "/var/run/docker.sock",
Target: "/var/run/docker.sock",
ReadOnly: false, // Council agents need Docker access for complex setup
},
{
Type: "volume",
Source: fmt.Sprintf("whoosh-council-%s", request.CouncilID.String()),
Target: "/workspace",
ReadOnly: false,
},
{
Type: "bind",
Source: "/rust/containers/WHOOSH/prompts",
Target: "/app/prompts",
ReadOnly: true, // Mount council prompts
},
}
}
// buildCouncilAgentPlacement creates placement constraints for council agents
func (ad *AgentDeployer) buildCouncilAgentPlacement(agent council.CouncilAgent) PlacementConfig {
return PlacementConfig{
Constraints: []string{
"node.role==worker", // Prefer worker nodes for council containers
},
}
}
// recordCouncilAgentDeployment records council agent deployment information in the database
func (ad *AgentDeployer) recordCouncilAgentDeployment(councilID uuid.UUID, agent council.CouncilAgent, serviceID string) error {
query := `
UPDATE council_agents
SET deployed = true, status = 'active', service_id = $1, deployed_at = NOW(), updated_at = NOW()
WHERE council_id = $2 AND agent_id = $3
`
_, err := ad.db.Exec(ad.ctx, query, serviceID, councilID, agent.AgentID)
return err
}
// updateCouncilDeploymentStatus updates the council deployment status in the database
func (ad *AgentDeployer) updateCouncilDeploymentStatus(councilID uuid.UUID, status, message string) error {
query := `
UPDATE councils
SET status = $1, updated_at = NOW()
WHERE id = $2
`
// Map deployment status to council status
councilStatus := "active"
if status == "failed" {
councilStatus = "failed"
} else if status == "partial" {
councilStatus = "active" // Partial deployment still allows council to function
}
_, err := ad.db.Exec(ad.ctx, query, councilStatus, councilID)
return err
}

View File

@@ -0,0 +1,568 @@
package orchestrator
import (
"context"
"encoding/json"
"fmt"
"io"
"time"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/mount"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/client"
"github.com/rs/zerolog/log"
)
// SwarmManager manages Docker Swarm services for agent deployment
type SwarmManager struct {
client *client.Client
ctx context.Context
cancel context.CancelFunc
registry string // Docker registry for agent images
}
// NewSwarmManager creates a new Docker Swarm manager
func NewSwarmManager(dockerHost, registry string) (*SwarmManager, error) {
ctx, cancel := context.WithCancel(context.Background())
// Create Docker client
var dockerClient *client.Client
var err error
if dockerHost != "" {
dockerClient, err = client.NewClientWithOpts(
client.WithHost(dockerHost),
client.WithAPIVersionNegotiation(),
)
} else {
dockerClient, err = client.NewClientWithOpts(
client.FromEnv,
client.WithAPIVersionNegotiation(),
)
}
if err != nil {
cancel()
return nil, fmt.Errorf("failed to create Docker client: %w", err)
}
// Test connection
_, err = dockerClient.Ping(ctx)
if err != nil {
cancel()
return nil, fmt.Errorf("failed to connect to Docker daemon: %w", err)
}
if registry == "" {
registry = "registry.home.deepblack.cloud" // Default private registry
}
return &SwarmManager{
client: dockerClient,
ctx: ctx,
cancel: cancel,
registry: registry,
}, nil
}
// Close closes the Docker client and cancels context
func (sm *SwarmManager) Close() error {
sm.cancel()
return sm.client.Close()
}
// AgentDeploymentConfig defines configuration for deploying an agent
type AgentDeploymentConfig struct {
TeamID string `json:"team_id"`
TaskID string `json:"task_id"`
AgentRole string `json:"agent_role"` // executor, coordinator, reviewer
AgentType string `json:"agent_type"` // general, specialized
Image string `json:"image"` // Docker image to use
Replicas uint64 `json:"replicas"` // Number of instances
Resources ResourceLimits `json:"resources"` // CPU/Memory limits
Environment map[string]string `json:"environment"` // Environment variables
TaskContext TaskContext `json:"task_context"` // Task-specific context
Networks []string `json:"networks"` // Docker networks to join
Volumes []VolumeMount `json:"volumes"` // Volume mounts
Placement PlacementConfig `json:"placement"` // Node placement constraints
}
// ResourceLimits defines CPU and memory limits for containers
type ResourceLimits struct {
CPULimit int64 `json:"cpu_limit"` // CPU limit in nano CPUs (1e9 = 1 CPU)
MemoryLimit int64 `json:"memory_limit"` // Memory limit in bytes
CPURequest int64 `json:"cpu_request"` // CPU request in nano CPUs
MemoryRequest int64 `json:"memory_request"` // Memory request in bytes
}
// TaskContext provides task-specific information to agents
type TaskContext struct {
IssueTitle string `json:"issue_title"`
IssueDescription string `json:"issue_description"`
Repository string `json:"repository"`
TechStack []string `json:"tech_stack"`
Requirements []string `json:"requirements"`
Priority string `json:"priority"`
ExternalURL string `json:"external_url"`
Metadata map[string]interface{} `json:"metadata"`
}
// VolumeMount defines a volume mount for containers
type VolumeMount struct {
Source string `json:"source"` // Host path or volume name
Target string `json:"target"` // Container path
ReadOnly bool `json:"readonly"` // Read-only mount
Type string `json:"type"` // bind, volume, tmpfs
}
// PlacementConfig defines where containers should be placed
type PlacementConfig struct {
Constraints []string `json:"constraints"` // Node constraints
Preferences []PlacementPref `json:"preferences"` // Placement preferences
Platforms []Platform `json:"platforms"` // Target platforms
}
// PlacementPref defines placement preferences
type PlacementPref struct {
Spread string `json:"spread"` // Spread across nodes
}
// Platform defines target platform for containers
type Platform struct {
Architecture string `json:"architecture"` // amd64, arm64, etc.
OS string `json:"os"` // linux, windows
}
// DeployAgent deploys an agent service to Docker Swarm
func (sm *SwarmManager) DeployAgent(config *AgentDeploymentConfig) (*swarm.Service, error) {
log.Info().
Str("team_id", config.TeamID).
Str("task_id", config.TaskID).
Str("agent_role", config.AgentRole).
Str("image", config.Image).
Msg("🚀 Deploying agent to Docker Swarm")
// Generate unique service name
serviceName := fmt.Sprintf("whoosh-agent-%s-%s-%s",
config.TeamID[:8],
config.TaskID[:8],
config.AgentRole,
)
// Build environment variables
env := sm.buildEnvironment(config)
// Build volume mounts
mounts := sm.buildMounts(config.Volumes)
// Build resource specifications
resources := sm.buildResources(config.Resources)
// Build placement constraints
placement := sm.buildPlacement(config.Placement)
// Create service specification
serviceSpec := swarm.ServiceSpec{
Annotations: swarm.Annotations{
Name: serviceName,
Labels: map[string]string{
"whoosh.team_id": config.TeamID,
"whoosh.task_id": config.TaskID,
"whoosh.agent_role": config.AgentRole,
"whoosh.agent_type": config.AgentType,
"whoosh.managed_by": "whoosh",
"whoosh.created_at": time.Now().Format(time.RFC3339),
},
},
TaskTemplate: swarm.TaskSpec{
ContainerSpec: &swarm.ContainerSpec{
Image: config.Image,
Env: env,
Mounts: mounts,
Labels: map[string]string{
"whoosh.team_id": config.TeamID,
"whoosh.task_id": config.TaskID,
"whoosh.agent_role": config.AgentRole,
},
// Add healthcheck
Healthcheck: &container.HealthConfig{
Test: []string{"CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"},
Interval: 30 * time.Second,
Timeout: 10 * time.Second,
Retries: 3,
},
},
Resources: resources,
Placement: placement,
Networks: sm.buildNetworks(config.Networks),
},
Mode: swarm.ServiceMode{
Replicated: &swarm.ReplicatedService{
Replicas: &config.Replicas,
},
},
UpdateConfig: &swarm.UpdateConfig{
Parallelism: 1,
Order: "start-first",
},
// RollbackConfig removed for compatibility
}
// Create the service
response, err := sm.client.ServiceCreate(sm.ctx, serviceSpec, types.ServiceCreateOptions{})
if err != nil {
return nil, fmt.Errorf("failed to create agent service: %w", err)
}
log.Info().
Str("service_id", response.ID).
Str("service_name", serviceName).
Msg("✅ Agent service created successfully")
// Wait for service to be created and return service info
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, response.ID, types.ServiceInspectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to inspect created service: %w", err)
}
return &service, nil
}
// buildEnvironment constructs environment variables for the container
func (sm *SwarmManager) buildEnvironment(config *AgentDeploymentConfig) []string {
env := []string{
fmt.Sprintf("WHOOSH_TEAM_ID=%s", config.TeamID),
fmt.Sprintf("WHOOSH_TASK_ID=%s", config.TaskID),
fmt.Sprintf("WHOOSH_AGENT_ROLE=%s", config.AgentRole),
fmt.Sprintf("WHOOSH_AGENT_TYPE=%s", config.AgentType),
}
// Add task context as environment variables
if config.TaskContext.IssueTitle != "" {
env = append(env, fmt.Sprintf("TASK_TITLE=%s", config.TaskContext.IssueTitle))
}
if config.TaskContext.Repository != "" {
env = append(env, fmt.Sprintf("TASK_REPOSITORY=%s", config.TaskContext.Repository))
}
if config.TaskContext.Priority != "" {
env = append(env, fmt.Sprintf("TASK_PRIORITY=%s", config.TaskContext.Priority))
}
if config.TaskContext.ExternalURL != "" {
env = append(env, fmt.Sprintf("TASK_EXTERNAL_URL=%s", config.TaskContext.ExternalURL))
}
// Add tech stack as JSON
if len(config.TaskContext.TechStack) > 0 {
techStackJSON, _ := json.Marshal(config.TaskContext.TechStack)
env = append(env, fmt.Sprintf("TASK_TECH_STACK=%s", string(techStackJSON)))
}
// Add requirements as JSON
if len(config.TaskContext.Requirements) > 0 {
requirementsJSON, _ := json.Marshal(config.TaskContext.Requirements)
env = append(env, fmt.Sprintf("TASK_REQUIREMENTS=%s", string(requirementsJSON)))
}
// Add custom environment variables
for key, value := range config.Environment {
env = append(env, fmt.Sprintf("%s=%s", key, value))
}
return env
}
// buildMounts constructs volume mounts for the container
func (sm *SwarmManager) buildMounts(volumes []VolumeMount) []mount.Mount {
mounts := make([]mount.Mount, len(volumes))
for i, vol := range volumes {
mountType := mount.TypeBind
switch vol.Type {
case "volume":
mountType = mount.TypeVolume
case "tmpfs":
mountType = mount.TypeTmpfs
}
mounts[i] = mount.Mount{
Type: mountType,
Source: vol.Source,
Target: vol.Target,
ReadOnly: vol.ReadOnly,
}
}
// Add default workspace volume
mounts = append(mounts, mount.Mount{
Type: mount.TypeVolume,
Source: fmt.Sprintf("whoosh-workspace"), // Shared workspace volume
Target: "/workspace",
ReadOnly: false,
})
return mounts
}
// buildResources constructs resource specifications
func (sm *SwarmManager) buildResources(limits ResourceLimits) *swarm.ResourceRequirements {
resources := &swarm.ResourceRequirements{}
// Set limits
if limits.CPULimit > 0 || limits.MemoryLimit > 0 {
resources.Limits = &swarm.Limit{}
if limits.CPULimit > 0 {
resources.Limits.NanoCPUs = limits.CPULimit
}
if limits.MemoryLimit > 0 {
resources.Limits.MemoryBytes = limits.MemoryLimit
}
}
// Set requests/reservations
if limits.CPURequest > 0 || limits.MemoryRequest > 0 {
resources.Reservations = &swarm.Resources{}
if limits.CPURequest > 0 {
resources.Reservations.NanoCPUs = limits.CPURequest
}
if limits.MemoryRequest > 0 {
resources.Reservations.MemoryBytes = limits.MemoryRequest
}
}
return resources
}
// buildPlacement constructs placement specifications
func (sm *SwarmManager) buildPlacement(config PlacementConfig) *swarm.Placement {
placement := &swarm.Placement{
Constraints: config.Constraints,
}
// Add preferences
for _, pref := range config.Preferences {
placement.Preferences = append(placement.Preferences, swarm.PlacementPreference{
Spread: &swarm.SpreadOver{
SpreadDescriptor: pref.Spread,
},
})
}
// Add platforms
for _, platform := range config.Platforms {
placement.Platforms = append(placement.Platforms, swarm.Platform{
Architecture: platform.Architecture,
OS: platform.OS,
})
}
return placement
}
// buildNetworks constructs network specifications
func (sm *SwarmManager) buildNetworks(networks []string) []swarm.NetworkAttachmentConfig {
if len(networks) == 0 {
// Default to chorus_default network
networks = []string{"chorus_default"}
}
networkConfigs := make([]swarm.NetworkAttachmentConfig, len(networks))
for i, networkName := range networks {
networkConfigs[i] = swarm.NetworkAttachmentConfig{
Target: networkName,
}
}
return networkConfigs
}
// RemoveAgent removes an agent service from Docker Swarm
func (sm *SwarmManager) RemoveAgent(serviceID string) error {
log.Info().
Str("service_id", serviceID).
Msg("🗑️ Removing agent service from Docker Swarm")
err := sm.client.ServiceRemove(sm.ctx, serviceID)
if err != nil {
return fmt.Errorf("failed to remove service: %w", err)
}
log.Info().
Str("service_id", serviceID).
Msg("✅ Agent service removed successfully")
return nil
}
// ListAgentServices lists all agent services managed by WHOOSH
func (sm *SwarmManager) ListAgentServices() ([]swarm.Service, error) {
services, err := sm.client.ServiceList(sm.ctx, types.ServiceListOptions{
Filters: filters.NewArgs(),
})
if err != nil {
return nil, fmt.Errorf("failed to list services: %w", err)
}
// Filter for WHOOSH-managed services
var agentServices []swarm.Service
for _, service := range services {
if managed, exists := service.Spec.Labels["whoosh.managed_by"]; exists && managed == "whoosh" {
agentServices = append(agentServices, service)
}
}
return agentServices, nil
}
// GetServiceLogs retrieves logs for a service
func (sm *SwarmManager) GetServiceLogs(serviceID string, lines int) (string, error) {
options := types.ContainerLogsOptions{
ShowStdout: true,
ShowStderr: true,
Tail: fmt.Sprintf("%d", lines),
Timestamps: true,
}
reader, err := sm.client.ServiceLogs(sm.ctx, serviceID, options)
if err != nil {
return "", fmt.Errorf("failed to get service logs: %w", err)
}
defer reader.Close()
logs, err := io.ReadAll(reader)
if err != nil {
return "", fmt.Errorf("failed to read service logs: %w", err)
}
return string(logs), nil
}
// ScaleService scales a service to the specified number of replicas
func (sm *SwarmManager) ScaleService(serviceID string, replicas uint64) error {
log.Info().
Str("service_id", serviceID).
Uint64("replicas", replicas).
Msg("📈 Scaling agent service")
// Get current service spec
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, serviceID, types.ServiceInspectOptions{})
if err != nil {
return fmt.Errorf("failed to inspect service: %w", err)
}
// Update replicas
service.Spec.Mode.Replicated.Replicas = &replicas
// Update the service
_, err = sm.client.ServiceUpdate(sm.ctx, serviceID, service.Version, service.Spec, types.ServiceUpdateOptions{})
if err != nil {
return fmt.Errorf("failed to scale service: %w", err)
}
log.Info().
Str("service_id", serviceID).
Uint64("replicas", replicas).
Msg("✅ Service scaled successfully")
return nil
}
// GetServiceStatus returns the current status of a service
func (sm *SwarmManager) GetServiceStatus(serviceID string) (*ServiceStatus, error) {
service, _, err := sm.client.ServiceInspectWithRaw(sm.ctx, serviceID, types.ServiceInspectOptions{})
if err != nil {
return nil, fmt.Errorf("failed to inspect service: %w", err)
}
// Get task status
tasks, err := sm.client.TaskList(sm.ctx, types.TaskListOptions{
Filters: filters.NewArgs(filters.Arg("service", serviceID)),
})
if err != nil {
return nil, fmt.Errorf("failed to list tasks: %w", err)
}
status := &ServiceStatus{
ServiceID: serviceID,
ServiceName: service.Spec.Name,
Image: service.Spec.TaskTemplate.ContainerSpec.Image,
Replicas: 0,
RunningTasks: 0,
FailedTasks: 0,
TaskStates: make(map[string]int),
CreatedAt: service.CreatedAt,
UpdatedAt: service.UpdatedAt,
}
if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil {
status.Replicas = *service.Spec.Mode.Replicated.Replicas
}
// Count task states
for _, task := range tasks {
state := string(task.Status.State)
status.TaskStates[state]++
switch task.Status.State {
case swarm.TaskStateRunning:
status.RunningTasks++
case swarm.TaskStateFailed:
status.FailedTasks++
}
}
return status, nil
}
// ServiceStatus represents the current status of a service
type ServiceStatus struct {
ServiceID string `json:"service_id"`
ServiceName string `json:"service_name"`
Image string `json:"image"`
Replicas uint64 `json:"replicas"`
RunningTasks uint64 `json:"running_tasks"`
FailedTasks uint64 `json:"failed_tasks"`
TaskStates map[string]int `json:"task_states"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
// CleanupFailedServices removes failed services
func (sm *SwarmManager) CleanupFailedServices() error {
services, err := sm.ListAgentServices()
if err != nil {
return fmt.Errorf("failed to list services: %w", err)
}
for _, service := range services {
status, err := sm.GetServiceStatus(service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to get service status")
continue
}
// Remove services with all failed tasks and no running tasks
if status.FailedTasks > 0 && status.RunningTasks == 0 {
log.Warn().
Str("service_id", service.ID).
Str("service_name", service.Spec.Name).
Uint64("failed_tasks", status.FailedTasks).
Msg("Removing failed service")
err = sm.RemoveAgent(service.ID)
if err != nil {
log.Error().
Err(err).
Str("service_id", service.ID).
Msg("Failed to remove failed service")
}
}
}
return nil
}