feat: Production readiness improvements for WHOOSH council formation
Major security, observability, and configuration improvements:
## Security Hardening
- Implemented configurable CORS (no more wildcards)
- Added comprehensive auth middleware for admin endpoints
- Enhanced webhook HMAC validation
- Added input validation and rate limiting
- Security headers and CSP policies
## Configuration Management
- Made N8N webhook URL configurable (WHOOSH_N8N_BASE_URL)
- Replaced all hardcoded endpoints with environment variables
- Added feature flags for LLM vs heuristic composition
- Gitea fetch hardening with EAGER_FILTER and FULL_RESCAN options
## API Completeness
- Implemented GetCouncilComposition function
- Added GET /api/v1/councils/{id} endpoint
- Council artifacts API (POST/GET /api/v1/councils/{id}/artifacts)
- /admin/health/details endpoint with component status
- Database lookup for repository URLs (no hardcoded fallbacks)
## Observability & Performance
- Added OpenTelemetry distributed tracing with goal/pulse correlation
- Performance optimization database indexes
- Comprehensive health monitoring
- Enhanced logging and error handling
## Infrastructure
- Production-ready P2P discovery (replaces mock implementation)
- Removed unused Redis configuration
- Enhanced Docker Swarm integration
- Added migration files for performance indexes
## Code Quality
- Comprehensive input validation
- Graceful error handling and failsafe fallbacks
- Backwards compatibility maintained
- Following security best practices
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,9 @@ import (
|
||||
"github.com/docker/docker/api/types/swarm"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/rs/zerolog/log"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
|
||||
"github.com/chorus-services/whoosh/internal/tracing"
|
||||
)
|
||||
|
||||
// SwarmManager manages Docker Swarm services for agent deployment
|
||||
@@ -88,6 +91,8 @@ type AgentDeploymentConfig struct {
|
||||
Networks []string `json:"networks"` // Docker networks to join
|
||||
Volumes []VolumeMount `json:"volumes"` // Volume mounts
|
||||
Placement PlacementConfig `json:"placement"` // Node placement constraints
|
||||
GoalID string `json:"goal_id,omitempty"`
|
||||
PulseID string `json:"pulse_id,omitempty"`
|
||||
}
|
||||
|
||||
// ResourceLimits defines CPU and memory limits for containers
|
||||
@@ -138,6 +143,26 @@ type Platform struct {
|
||||
|
||||
// DeployAgent deploys an agent service to Docker Swarm
|
||||
func (sm *SwarmManager) DeployAgent(config *AgentDeploymentConfig) (*swarm.Service, error) {
|
||||
ctx, span := tracing.StartDeploymentSpan(sm.ctx, "deploy_agent", config.AgentRole)
|
||||
defer span.End()
|
||||
|
||||
// Add tracing attributes
|
||||
span.SetAttributes(
|
||||
attribute.String("agent.team_id", config.TeamID),
|
||||
attribute.String("agent.task_id", config.TaskID),
|
||||
attribute.String("agent.role", config.AgentRole),
|
||||
attribute.String("agent.type", config.AgentType),
|
||||
attribute.String("agent.image", config.Image),
|
||||
)
|
||||
|
||||
// Add goal.id and pulse.id if available in config
|
||||
if config.GoalID != "" {
|
||||
span.SetAttributes(attribute.String("goal.id", config.GoalID))
|
||||
}
|
||||
if config.PulseID != "" {
|
||||
span.SetAttributes(attribute.String("pulse.id", config.PulseID))
|
||||
}
|
||||
|
||||
log.Info().
|
||||
Str("team_id", config.TeamID).
|
||||
Str("task_id", config.TaskID).
|
||||
@@ -212,11 +237,24 @@ func (sm *SwarmManager) DeployAgent(config *AgentDeploymentConfig) (*swarm.Servi
|
||||
}
|
||||
|
||||
// Create the service
|
||||
response, err := sm.client.ServiceCreate(sm.ctx, serviceSpec, types.ServiceCreateOptions{})
|
||||
response, err := sm.client.ServiceCreate(ctx, serviceSpec, types.ServiceCreateOptions{})
|
||||
if err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
span.SetAttributes(
|
||||
attribute.String("deployment.status", "failed"),
|
||||
attribute.String("deployment.service_name", serviceName),
|
||||
)
|
||||
return nil, fmt.Errorf("failed to create agent service: %w", err)
|
||||
}
|
||||
|
||||
// Add success metrics to span
|
||||
span.SetAttributes(
|
||||
attribute.String("deployment.status", "success"),
|
||||
attribute.String("deployment.service_id", response.ID),
|
||||
attribute.String("deployment.service_name", serviceName),
|
||||
attribute.Int64("deployment.replicas", int64(config.Replicas)),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("service_id", response.ID).
|
||||
Str("service_name", serviceName).
|
||||
|
||||
Reference in New Issue
Block a user