feat: Production readiness improvements for WHOOSH council formation

Major security, observability, and configuration improvements:

## Security Hardening
- Implemented configurable CORS (no more wildcards)
- Added comprehensive auth middleware for admin endpoints
- Enhanced webhook HMAC validation
- Added input validation and rate limiting
- Security headers and CSP policies

## Configuration Management
- Made N8N webhook URL configurable (WHOOSH_N8N_BASE_URL)
- Replaced all hardcoded endpoints with environment variables
- Added feature flags for LLM vs heuristic composition
- Gitea fetch hardening with EAGER_FILTER and FULL_RESCAN options

## API Completeness
- Implemented GetCouncilComposition function
- Added GET /api/v1/councils/{id} endpoint
- Council artifacts API (POST/GET /api/v1/councils/{id}/artifacts)
- /admin/health/details endpoint with component status
- Database lookup for repository URLs (no hardcoded fallbacks)

## Observability & Performance
- Added OpenTelemetry distributed tracing with goal/pulse correlation
- Performance optimization database indexes
- Comprehensive health monitoring
- Enhanced logging and error handling

## Infrastructure
- Production-ready P2P discovery (replaces mock implementation)
- Removed unused Redis configuration
- Enhanced Docker Swarm integration
- Added migration files for performance indexes

## Code Quality
- Comprehensive input validation
- Graceful error handling and failsafe fallbacks
- Backwards compatibility maintained
- Following security best practices

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Code
2025-09-12 20:34:17 +10:00
parent 56ea52b743
commit 131868bdca
1740 changed files with 575904 additions and 171 deletions

View File

@@ -13,10 +13,12 @@ import (
"github.com/chorus-services/whoosh/internal/council"
"github.com/chorus-services/whoosh/internal/gitea"
"github.com/chorus-services/whoosh/internal/orchestrator"
"github.com/chorus-services/whoosh/internal/tracing"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgxpool"
"github.com/rs/zerolog/log"
"go.opentelemetry.io/otel/attribute"
)
// Monitor manages repository monitoring and task creation
@@ -88,14 +90,20 @@ func (m *Monitor) Stop() {
// syncAllRepositories syncs all monitored repositories
func (m *Monitor) syncAllRepositories(ctx context.Context) {
ctx, span := tracing.StartMonitorSpan(ctx, "sync_all_repositories", "all")
defer span.End()
log.Info().Msg("🔄 Starting repository sync cycle")
repos, err := m.getMonitoredRepositories(ctx)
if err != nil {
tracing.SetSpanError(span, err)
log.Error().Err(err).Msg("Failed to get monitored repositories")
return
}
span.SetAttributes(attribute.Int("repositories.count", len(repos)))
if len(repos) == 0 {
log.Info().Msg("No repositories to monitor")
return
@@ -112,11 +120,23 @@ func (m *Monitor) syncAllRepositories(ctx context.Context) {
}
}
span.SetAttributes(attribute.String("sync.status", "completed"))
log.Info().Msg("✅ Repository sync cycle completed")
}
// syncRepository syncs a single repository
func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
ctx, span := tracing.StartMonitorSpan(ctx, "sync_repository", repo.FullName)
defer span.End()
span.SetAttributes(
attribute.String("repository.id", repo.ID),
attribute.String("repository.owner", repo.Owner),
attribute.String("repository.name", repo.Name),
attribute.String("repository.sync_status", repo.SyncStatus),
attribute.Bool("repository.chorus_enabled", repo.EnableChorusIntegration),
)
log.Info().
Str("repository", repo.FullName).
Msg("Syncing repository")
@@ -206,6 +226,14 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
duration := time.Since(startTime)
// Add span attributes for the sync results
span.SetAttributes(
attribute.Int("issues.processed", len(issues)),
attribute.Int("tasks.created", created),
attribute.Int("tasks.updated", updated),
attribute.Int64("duration.ms", duration.Milliseconds()),
)
// Check if repository should transition from initial scan to active status
if repo.SyncStatus == "initial_scan" || repo.SyncStatus == "pending" {
// Repository has completed initial scan
@@ -221,19 +249,24 @@ func (m *Monitor) syncRepository(ctx context.Context, repo RepositoryConfig) {
Msg("Transitioning repository from initial scan to active status - content found")
if err := m.updateRepositoryStatus(ctx, repo.ID, "active", nil); err != nil {
tracing.SetSpanError(span, err)
log.Error().Err(err).
Str("repository", repo.FullName).
Msg("Failed to transition repository to active status")
} else {
span.SetAttributes(attribute.String("repository.transition", "initial_scan_to_active"))
}
} else {
log.Info().
Str("repository", repo.FullName).
Msg("Initial scan completed - no content found, keeping in initial_scan status")
span.SetAttributes(attribute.String("repository.transition", "initial_scan_no_content"))
}
}
// Update repository sync timestamps and statistics
if err := m.updateRepositorySyncInfo(ctx, repo.ID, time.Now(), created, updated); err != nil {
tracing.SetSpanError(span, err)
log.Error().Err(err).
Str("repository", repo.FullName).
Msg("Failed to update repository sync info")
@@ -865,6 +898,17 @@ func (m *Monitor) assignTaskToTeam(ctx context.Context, taskID, teamID string) e
// triggerCouncilFormation initiates council formation for a project kickoff
func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, issue gitea.Issue, repo RepositoryConfig) {
ctx, span := tracing.StartCouncilSpan(ctx, "trigger_council_formation", "")
defer span.End()
span.SetAttributes(
attribute.String("task.id", taskID),
attribute.Int64("issue.id", issue.ID),
attribute.Int64("issue.number", issue.Number),
attribute.String("repository.name", repo.FullName),
attribute.String("issue.title", issue.Title),
)
log.Info().
Str("task_id", taskID).
Int64("issue_id", issue.ID).
@@ -875,6 +919,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
// Convert task ID to UUID
taskUUID, err := uuid.Parse(taskID)
if err != nil {
tracing.SetSpanError(span, err)
log.Error().
Err(err).
Str("task_id", taskID).
@@ -884,6 +929,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
// Extract project name from repository name (remove owner prefix)
projectName := strings.Split(repo.FullName, "/")[1]
span.SetAttributes(attribute.String("project.name", projectName))
// Create council formation request
councilRequest := &council.CouncilFormationRequest{
@@ -907,6 +953,7 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
// Form the council
composition, err := m.council.FormCouncil(ctx, councilRequest)
if err != nil {
tracing.SetSpanError(span, err)
log.Error().Err(err).
Str("task_id", taskID).
Str("project_name", projectName).
@@ -914,6 +961,12 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
return
}
span.SetAttributes(
attribute.String("council.id", composition.CouncilID.String()),
attribute.Int("council.core_agents", len(composition.CoreAgents)),
attribute.Int("council.optional_agents", len(composition.OptionalAgents)),
)
log.Info().
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
@@ -945,6 +998,18 @@ func (m *Monitor) triggerCouncilFormation(ctx context.Context, taskID string, is
// deployCouncilAgents deploys Docker containers for the council agents
func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, composition *council.CouncilComposition, request *council.CouncilFormationRequest, repo RepositoryConfig) {
ctx, span := tracing.StartDeploymentSpan(ctx, "deploy_council_agents", composition.CouncilID.String())
defer span.End()
span.SetAttributes(
attribute.String("task.id", taskID),
attribute.String("council.id", composition.CouncilID.String()),
attribute.String("project.name", composition.ProjectName),
attribute.Int("council.core_agents", len(composition.CoreAgents)),
attribute.Int("council.optional_agents", len(composition.OptionalAgents)),
attribute.String("repository.name", repo.FullName),
)
log.Info().
Str("task_id", taskID).
Str("council_id", composition.CouncilID.String()).
@@ -973,6 +1038,7 @@ func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, compos
// Deploy the council agents
result, err := m.agentDeployer.DeployCouncilAgents(deploymentRequest)
if err != nil {
tracing.SetSpanError(span, err)
log.Error().
Err(err).
Str("council_id", composition.CouncilID.String()).
@@ -983,6 +1049,12 @@ func (m *Monitor) deployCouncilAgents(ctx context.Context, taskID string, compos
return
}
span.SetAttributes(
attribute.String("deployment.status", result.Status),
attribute.Int("deployment.deployed_agents", len(result.DeployedAgents)),
attribute.Int("deployment.errors", len(result.Errors)),
)
log.Info().
Str("council_id", composition.CouncilID.String()).
Str("deployment_status", result.Status).