feat: Production readiness improvements for WHOOSH council formation
Major security, observability, and configuration improvements:
## Security Hardening
- Implemented configurable CORS (no more wildcards)
- Added comprehensive auth middleware for admin endpoints
- Enhanced webhook HMAC validation
- Added input validation and rate limiting
- Security headers and CSP policies
## Configuration Management
- Made N8N webhook URL configurable (WHOOSH_N8N_BASE_URL)
- Replaced all hardcoded endpoints with environment variables
- Added feature flags for LLM vs heuristic composition
- Gitea fetch hardening with EAGER_FILTER and FULL_RESCAN options
## API Completeness
- Implemented GetCouncilComposition function
- Added GET /api/v1/councils/{id} endpoint
- Council artifacts API (POST/GET /api/v1/councils/{id}/artifacts)
- /admin/health/details endpoint with component status
- Database lookup for repository URLs (no hardcoded fallbacks)
## Observability & Performance
- Added OpenTelemetry distributed tracing with goal/pulse correlation
- Performance optimization database indexes
- Comprehensive health monitoring
- Enhanced logging and error handling
## Infrastructure
- Production-ready P2P discovery (replaces mock implementation)
- Removed unused Redis configuration
- Enhanced Docker Swarm integration
- Added migration files for performance indexes
## Code Quality
- Comprehensive input validation
- Graceful error handling and failsafe fallbacks
- Backwards compatibility maintained
- Following security best practices
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -13,10 +13,12 @@ import (
|
||||
"github.com/chorus-services/whoosh/internal/council"
|
||||
"github.com/chorus-services/whoosh/internal/gitea"
|
||||
"github.com/chorus-services/whoosh/internal/orchestrator"
|
||||
"github.com/chorus-services/whoosh/internal/tracing"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
"github.com/rs/zerolog/log"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
// Monitor manages repository monitoring and task creation
|
||||
@@ -88,14 +90,20 @@ func (m *Monitor) Stop() {
|
||||
|
||||
// syncAllRepositories syncs all monitored repositories
|
||||
func (m *Monitor) syncAllRepositories(ctx context.Context) {
|
||||
ctx, span := tracing.StartMonitorSpan(ctx, "sync_all_repositories", "all")
|
||||
defer span.End()
|
||||
|
||||
log.Info().Msg("🔄 Starting repository sync cycle")
|
||||
|
||||
repos, err := m.getMonitoredRepositories(ctx)
|
||||
if err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().Err(err).Msg("Failed to get monitored repositories")
|
||||
return
|
||||
}
|
||||
|
||||
span.SetAttributes(attribute.Int("repositories.count", len(repos)))
|
||||
|
||||
if len(repos) == 0 {
|
||||
log.Info().Msg("No repositories to monitor")
|
||||
return
|
||||
@@ -112,11 +120,23 @@ func (m *Monitor) syncAllRepositories(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
span.SetAttributes(attribute.String("sync.status", "completed"))
|
||||
log.Info().Msg("✅ Repository sync cycle completed")
|
||||
}
|
||||
|
||||
// syncRepository syncs a single repository
|
||||
func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
|
||||
ctx, span := tracing.StartMonitorSpan(ctx, "sync_repository", repo.FullName)
|
||||
defer span.End()
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("repository.id", repo.ID),
|
||||
attribute.String("repository.owner", repo.Owner),
|
||||
attribute.String("repository.name", repo.Name),
|
||||
attribute.String("repository.sync_status", repo.SyncStatus),
|
||||
attribute.Bool("repository.chorus_enabled", repo.EnableChorusIntegration),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("repository", repo.FullName).
|
||||
Msg("Syncing repository")
|
||||
@@ -206,6 +226,14 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
|
||||
|
||||
duration := time.Since(startTime)
|
||||
|
||||
// Add span attributes for the sync results
|
||||
span.SetAttributes(
|
||||
attribute.Int("issues.processed", len(issues)),
|
||||
attribute.Int("tasks.created", created),
|
||||
attribute.Int("tasks.updated", updated),
|
||||
attribute.Int64("duration.ms", duration.Milliseconds()),
|
||||
)
|
||||
|
||||
// Check if repository should transition from initial scan to active status
|
||||
if repo.SyncStatus == "initial_scan" || repo.SyncStatus == "pending" {
|
||||
// Repository has completed initial scan
|
||||
@@ -221,19 +249,24 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
|
||||
Msg("Transitioning repository from initial scan to active status - content found")
|
||||
|
||||
if err := m.updateRepositoryStatus(ctx, repo.ID, "active", nil); err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().Err(err).
|
||||
Str("repository", repo.FullName).
|
||||
Msg("Failed to transition repository to active status")
|
||||
} else {
|
||||
span.SetAttributes(attribute.String("repository.transition", "initial_scan_to_active"))
|
||||
}
|
||||
} else {
|
||||
log.Info().
|
||||
Str("repository", repo.FullName).
|
||||
Msg("Initial scan completed - no content found, keeping in initial_scan status")
|
||||
span.SetAttributes(attribute.String("repository.transition", "initial_scan_no_content"))
|
||||
}
|
||||
}
|
||||
|
||||
// Update repository sync timestamps and statistics
|
||||
if err := m.updateRepositorySyncInfo(ctx, repo.ID, time.Now(), created, updated); err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().Err(err).
|
||||
Str("repository", repo.FullName).
|
||||
Msg("Failed to update repository sync info")
|
||||
@@ -865,6 +898,17 @@ func (m *Monitor) assignTaskToTeam(ctx context.Context, taskID, teamID string) e
|
||||
|
||||
// triggerCouncilFormation initiates council formation for a project kickoff
|
||||
func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, issue gitea.Issue, repo RepositoryConfig) {
|
||||
ctx, span := tracing.StartCouncilSpan(ctx, "trigger_council_formation", "")
|
||||
defer span.End()
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", taskID),
|
||||
attribute.Int64("issue.id", issue.ID),
|
||||
attribute.Int64("issue.number", issue.Number),
|
||||
attribute.String("repository.name", repo.FullName),
|
||||
attribute.String("issue.title", issue.Title),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("task_id", taskID).
|
||||
Int64("issue_id", issue.ID).
|
||||
@@ -875,6 +919,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
|
||||
// Convert task ID to UUID
|
||||
taskUUID, err := uuid.Parse(taskID)
|
||||
if err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().
|
||||
Err(err).
|
||||
Str("task_id", taskID).
|
||||
@@ -884,6 +929,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
|
||||
|
||||
// Extract project name from repository name (remove owner prefix)
|
||||
projectName := strings.Split(repo.FullName, "/")[1]
|
||||
span.SetAttributes(attribute.String("project.name", projectName))
|
||||
|
||||
// Create council formation request
|
||||
councilRequest := &council.CouncilFormationRequest{
|
||||
@@ -907,6 +953,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
|
||||
// Form the council
|
||||
composition, err := m.council.FormCouncil(ctx, councilRequest)
|
||||
if err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().Err(err).
|
||||
Str("task_id", taskID).
|
||||
Str("project_name", projectName).
|
||||
@@ -914,6 +961,12 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
|
||||
return
|
||||
}
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("council.id", composition.CouncilID.String()),
|
||||
attribute.Int("council.core_agents", len(composition.CoreAgents)),
|
||||
attribute.Int("council.optional_agents", len(composition.OptionalAgents)),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("task_id", taskID).
|
||||
Str("council_id", composition.CouncilID.String()).
|
||||
@@ -945,6 +998,18 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
|
||||
|
||||
// deployCouncilAgents deploys Docker containers for the council agents
|
||||
func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, composition *council.CouncilComposition, request *council.CouncilFormationRequest, repo RepositoryConfig) {
|
||||
ctx, span := tracing.StartDeploymentSpan(ctx, "deploy_council_agents", composition.CouncilID.String())
|
||||
defer span.End()
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", taskID),
|
||||
attribute.String("council.id", composition.CouncilID.String()),
|
||||
attribute.String("project.name", composition.ProjectName),
|
||||
attribute.Int("council.core_agents", len(composition.CoreAgents)),
|
||||
attribute.Int("council.optional_agents", len(composition.OptionalAgents)),
|
||||
attribute.String("repository.name", repo.FullName),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("task_id", taskID).
|
||||
Str("council_id", composition.CouncilID.String()).
|
||||
@@ -973,6 +1038,7 @@ func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, compos
|
||||
// Deploy the council agents
|
||||
result, err := m.agentDeployer.DeployCouncilAgents(deploymentRequest)
|
||||
if err != nil {
|
||||
tracing.SetSpanError(span, err)
|
||||
log.Error().
|
||||
Err(err).
|
||||
Str("council_id", composition.CouncilID.String()).
|
||||
@@ -983,6 +1049,12 @@ func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, compos
|
||||
return
|
||||
}
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("deployment.status", result.Status),
|
||||
attribute.Int("deployment.deployed_agents", len(result.DeployedAgents)),
|
||||
attribute.Int("deployment.errors", len(result.Errors)),
|
||||
)
|
||||
|
||||
log.Info().
|
||||
Str("council_id", composition.CouncilID.String()).
|
||||
Str("deployment_status", result.Status).
|
||||
|
||||
Reference in New Issue
Block a user