version: "3.9" services: chorus: image: anthonyrawlins/chorus:latest # REQUIRED: License configuration (CHORUS will not start without this) environment: # CRITICAL: License configuration - REQUIRED for operation - CHORUS_LICENSE_ID_FILE=/run/secrets/chorus_license_id - CHORUS_CLUSTER_ID=${CHORUS_CLUSTER_ID:-docker-cluster} - CHORUS_KACHING_URL=${CHORUS_KACHING_URL:-https://kaching.chorus.services/api} # Agent configuration - CHORUS_AGENT_ID=${CHORUS_AGENT_ID:-} # Auto-generated if not provided - CHORUS_SPECIALIZATION=${CHORUS_SPECIALIZATION:-general_developer} - CHORUS_MAX_TASKS=${CHORUS_MAX_TASKS:-3} - CHORUS_CAPABILITIES=general_development,task_coordination,admin_election # Network configuration - CHORUS_API_PORT=8080 - CHORUS_HEALTH_PORT=8081 - CHORUS_P2P_PORT=9000 - CHORUS_BIND_ADDRESS=0.0.0.0 # Scaling optimizations (as per WHOOSH issue #7) - CHORUS_MDNS_ENABLED=false # Disabled for container/swarm environments - CHORUS_DIALS_PER_SEC=5 # Rate limit outbound connections to prevent storms - CHORUS_MAX_CONCURRENT_DHT=16 # Limit concurrent DHT queries # Election stability windows (Medium-risk fix 2.1) - CHORUS_ELECTION_MIN_TERM=120s # Minimum time between elections to prevent churn - CHORUS_LEADER_MIN_TERM=240s # Minimum time before challenging healthy leader # Assignment system for runtime configuration (Medium-risk fix 2.2) - ASSIGN_URL=${ASSIGN_URL:-} # Optional: WHOOSH assignment endpoint - TASK_SLOT=${TASK_SLOT:-} # Optional: Task slot identifier - TASK_ID=${TASK_ID:-} # Optional: Task identifier - NODE_ID=${NODE_ID:-} # Optional: Node identifier # Bootstrap pool configuration (supports JSON and CSV) - BOOTSTRAP_JSON=/config/bootstrap.json # Optional: JSON bootstrap config - CHORUS_BOOTSTRAP_PEERS=${CHORUS_BOOTSTRAP_PEERS:-} # CSV fallback # AI configuration - Provider selection - CHORUS_AI_PROVIDER=${CHORUS_AI_PROVIDER:-resetdata} # ResetData configuration (default provider) - RESETDATA_BASE_URL=${RESETDATA_BASE_URL:-https://models.au-syd.resetdata.ai/v1} - RESETDATA_API_KEY_FILE=/run/secrets/resetdata_api_key - RESETDATA_MODEL=${RESETDATA_MODEL:-meta/llama-3.1-8b-instruct} # Ollama configuration (alternative provider) - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://host.docker.internal:11434} # Model configuration - CHORUS_MODELS=${CHORUS_MODELS:-meta/llama-3.1-8b-instruct} - CHORUS_DEFAULT_REASONING_MODEL=${CHORUS_DEFAULT_REASONING_MODEL:-meta/llama-3.1-8b-instruct} # LightRAG configuration (optional RAG enhancement) - CHORUS_LIGHTRAG_ENABLED=${CHORUS_LIGHTRAG_ENABLED:-false} - CHORUS_LIGHTRAG_BASE_URL=${CHORUS_LIGHTRAG_BASE_URL:-http://lightrag:9621} - CHORUS_LIGHTRAG_TIMEOUT=${CHORUS_LIGHTRAG_TIMEOUT:-30s} - CHORUS_LIGHTRAG_API_KEY=${CHORUS_LIGHTRAG_API_KEY:-your-secure-api-key-here} - CHORUS_LIGHTRAG_DEFAULT_MODE=${CHORUS_LIGHTRAG_DEFAULT_MODE:-hybrid} # Logging configuration - LOG_LEVEL=${LOG_LEVEL:-info} - LOG_FORMAT=${LOG_FORMAT:-structured} # BACKBEAT configuration - CHORUS_BACKBEAT_ENABLED=${CHORUS_BACKBEAT_ENABLED:-true} - CHORUS_BACKBEAT_CLUSTER_ID=${CHORUS_BACKBEAT_CLUSTER_ID:-chorus-production} - CHORUS_BACKBEAT_AGENT_ID=${CHORUS_BACKBEAT_AGENT_ID:-} # Auto-generated from CHORUS_AGENT_ID - CHORUS_BACKBEAT_NATS_URL=${CHORUS_BACKBEAT_NATS_URL:-nats://backbeat-nats:4222} # Prompt sourcing (mounted volume) - CHORUS_PROMPTS_DIR=/etc/chorus/prompts - CHORUS_DEFAULT_INSTRUCTIONS_PATH=/etc/chorus/prompts/defaults.md - CHORUS_ROLE=${CHORUS_ROLE:-arbiter} # Docker secrets for sensitive configuration secrets: - chorus_license_id - resetdata_api_key # Configuration files configs: - source: chorus_bootstrap target: /config/bootstrap.json # Persistent data storage volumes: - chorus_data:/app/data # Mount prompts directory read-only for role YAMLs and defaults.md - /rust/containers/WHOOSH/prompts:/etc/chorus/prompts:ro # Network ports ports: - "${CHORUS_P2P_PORT:-9000}:9000" # P2P communication # Container resource limits deploy: mode: replicated replicas: ${CHORUS_REPLICAS:-20} update_config: parallelism: 1 delay: 10s failure_action: pause order: start-first restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s resources: limits: cpus: "${CHORUS_CPU_LIMIT:-1.0}" memory: "${CHORUS_MEMORY_LIMIT:-1G}" reservations: cpus: "0.1" memory: 128M placement: constraints: - node.hostname != acacia preferences: - spread: node.hostname # CHORUS is internal-only, no Traefik labels needed # Network configuration networks: - chorus_net # Host resolution for external services extra_hosts: - "host.docker.internal:host-gateway" # Container logging configuration logging: driver: "json-file" options: max-size: "10m" max-file: "3" tag: "{{.ImageName}}/{{.Name}}/{{.ID}}" # Health check configuration healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8081/health"] interval: 30s timeout: 10s retries: 3 start_period: 10s whoosh: image: anthonyrawlins/whoosh:latest ports: - target: 8080 published: 8800 protocol: tcp mode: ingress environment: # Database configuration WHOOSH_DATABASE_DB_HOST: postgres WHOOSH_DATABASE_DB_PORT: 5432 WHOOSH_DATABASE_DB_NAME: whoosh WHOOSH_DATABASE_DB_USER: whoosh WHOOSH_DATABASE_DB_PASSWORD_FILE: /run/secrets/whoosh_db_password WHOOSH_DATABASE_DB_SSL_MODE: disable WHOOSH_DATABASE_DB_AUTO_MIGRATE: "true" # Server configuration WHOOSH_SERVER_LISTEN_ADDR: ":8080" WHOOSH_SERVER_READ_TIMEOUT: "30s" WHOOSH_SERVER_WRITE_TIMEOUT: "30s" WHOOSH_SERVER_SHUTDOWN_TIMEOUT: "30s" # UI static directory (served at site root by WHOOSH) WHOOSH_UI_DIR: "/app/ui" # GITEA configuration WHOOSH_GITEA_BASE_URL: https://gitea.chorus.services WHOOSH_GITEA_TOKEN_FILE: /run/secrets/gitea_token WHOOSH_GITEA_WEBHOOK_TOKEN_FILE: /run/secrets/webhook_token WHOOSH_GITEA_WEBHOOK_PATH: /webhooks/gitea # Auth configuration WHOOSH_AUTH_JWT_SECRET_FILE: /run/secrets/jwt_secret WHOOSH_AUTH_SERVICE_TOKENS_FILE: /run/secrets/service_tokens WHOOSH_AUTH_JWT_EXPIRY: "24h" # Logging WHOOSH_LOGGING_LEVEL: debug WHOOSH_LOGGING_ENVIRONMENT: production # Redis configuration WHOOSH_REDIS_ENABLED: "true" WHOOSH_REDIS_HOST: redis WHOOSH_REDIS_PORT: 6379 WHOOSH_REDIS_PASSWORD_FILE: /run/secrets/redis_password WHOOSH_REDIS_DATABASE: 0 # Scaling system configuration WHOOSH_SCALING_KACHING_URL: "https://kaching.chorus.services" WHOOSH_SCALING_BACKBEAT_URL: "http://backbeat-pulse:8080" WHOOSH_SCALING_CHORUS_URL: "http://chorus:9000" # BACKBEAT integration configuration (temporarily disabled) WHOOSH_BACKBEAT_ENABLED: "false" WHOOSH_BACKBEAT_CLUSTER_ID: "chorus-production" WHOOSH_BACKBEAT_AGENT_ID: "whoosh" WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222" # Docker integration configuration (disabled for agent assignment architecture) WHOOSH_DOCKER_ENABLED: "false" secrets: - whoosh_db_password - gitea_token - webhook_token - jwt_secret - service_tokens - redis_password volumes: - whoosh_ui:/app/ui:ro # - /var/run/docker.sock:/var/run/docker.sock # Disabled for agent assignment architecture deploy: replicas: 2 restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s update_config: parallelism: 1 delay: 10s failure_action: pause monitor: 60s order: start-first # rollback_config: # parallelism: 1 # delay: 0s # failure_action: pause # monitor: 60s # order: stop-first placement: constraints: - node.hostname != acacia preferences: - spread: node.hostname resources: limits: memory: 256M cpus: '0.5' reservations: memory: 128M cpus: '0.25' labels: - traefik.enable=true - traefik.docker.network=tengig - traefik.http.routers.whoosh.rule=Host(`whoosh.chorus.services`) - traefik.http.routers.whoosh.entrypoints=web,web-secured - traefik.http.routers.whoosh.tls=true - traefik.http.routers.whoosh.tls.certresolver=letsencryptresolver - traefik.http.services.whoosh.loadbalancer.server.port=8080 - traefik.http.services.whoosh.loadbalancer.passhostheader=true - traefik.http.middlewares.whoosh-auth.basicauth.users=admin:$2y$10$example_hash networks: - tengig - chorus_net healthcheck: test: ["CMD", "/app/whoosh", "--health-check"] interval: 30s timeout: 10s retries: 3 start_period: 40s postgres: image: postgres:15-alpine environment: POSTGRES_DB: whoosh POSTGRES_USER: whoosh POSTGRES_PASSWORD_FILE: /run/secrets/whoosh_db_password POSTGRES_INITDB_ARGS: --auth-host=scram-sha-256 secrets: - whoosh_db_password volumes: - whoosh_postgres_data:/var/lib/postgresql/data deploy: replicas: 1 restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s placement: preferences: - spread: node.hostname resources: limits: memory: 512M cpus: '1.0' reservations: memory: 256M cpus: '0.5' networks: - chorus_net healthcheck: test: ["CMD-SHELL", "pg_isready -h localhost -p 5432 -U whoosh -d whoosh"] interval: 30s timeout: 10s retries: 5 start_period: 40s redis: image: redis:7-alpine command: sh -c 'redis-server --requirepass "$$(cat /run/secrets/redis_password)" --appendonly yes' secrets: - redis_password volumes: - whoosh_redis_data:/data deploy: replicas: 1 restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s placement: preferences: - spread: node.hostname resources: limits: memory: 128M cpus: '0.25' reservations: memory: 64M cpus: '0.1' networks: - chorus_net healthcheck: test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $$(cat /run/secrets/redis_password) ping"] interval: 30s timeout: 10s retries: 3 start_period: 30s prometheus: image: prom/prometheus:latest command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' volumes: - /rust/containers/CHORUS/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - /rust/containers/CHORUS/monitoring/prometheus:/prometheus ports: - "9099:9090" # Expose Prometheus UI deploy: replicas: 1 labels: - traefik.enable=true - traefik.http.routers.prometheus.rule=Host(`prometheus.chorus.services`) - traefik.http.routers.prometheus.entrypoints=web,web-secured - traefik.http.routers.prometheus.tls=true - traefik.http.routers.prometheus.tls.certresolver=letsencryptresolver - traefik.http.services.prometheus.loadbalancer.server.port=9090 networks: - chorus_net - tengig healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/ready"] interval: 30s timeout: 10s retries: 3 start_period: 10s grafana: image: grafana/grafana:latest user: "1000:1000" environment: - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin} # Use a strong password in production - GF_SERVER_ROOT_URL=https://grafana.chorus.services volumes: - /rust/containers/CHORUS/monitoring/grafana:/var/lib/grafana ports: - "3300:3000" # Expose Grafana UI deploy: replicas: 1 labels: - traefik.enable=true - traefik.http.routers.grafana.rule=Host(`grafana.chorus.services`) - traefik.http.routers.grafana.entrypoints=web,web-secured - traefik.http.routers.grafana.tls=true - traefik.http.routers.grafana.tls.certresolver=letsencryptresolver - traefik.http.services.grafana.loadbalancer.server.port=3000 networks: - chorus_net - tengig healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"] interval: 30s timeout: 10s retries: 3 start_period: 10s # BACKBEAT Pulse Service - Leader-elected tempo broadcaster # REQ: BACKBEAT-REQ-001 - Single BeatFrame publisher per cluster # REQ: BACKBEAT-OPS-001 - One replica prefers leadership backbeat-pulse: image: anthonyrawlins/backbeat-pulse:v1.0.6 command: > ./pulse -cluster=chorus-production -admin-port=8080 -raft-bind=0.0.0.0:9000 -data-dir=/data -nats=nats://backbeat-nats:4222 -tempo=2 -bar-length=8 -log-level=info # Internal service ports (not externally exposed - routed via Traefik) expose: - "8080" # Admin API - "9000" # Raft communication # REQ: BACKBEAT-OPS-002 - Health probes for liveness/readiness healthcheck: test: ["CMD", "nc", "-z", "localhost", "8080"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: replicas: 1 # Single leader with automatic failover restart_policy: condition: on-failure delay: 30s # Wait longer for NATS to be ready max_attempts: 5 window: 120s update_config: parallelism: 1 delay: 30s # Wait for leader election failure_action: pause monitor: 60s order: start-first placement: preferences: - spread: node.hostname resources: limits: memory: 256M cpus: '0.5' reservations: memory: 128M cpus: '0.25' # Traefik routing for admin API labels: - traefik.enable=true - traefik.http.routers.backbeat-pulse.rule=Host(`backbeat-pulse.chorus.services`) - traefik.http.routers.backbeat-pulse.tls=true - traefik.http.routers.backbeat-pulse.tls.certresolver=letsencryptresolver - traefik.http.services.backbeat-pulse.loadbalancer.server.port=8080 networks: - chorus_net - tengig # External network for Traefik # Container logging logging: driver: "json-file" options: max-size: "10m" max-file: "3" tag: "backbeat-pulse/{{.Name}}/{{.ID}}" # BACKBEAT Reverb Service - StatusClaim aggregator # REQ: BACKBEAT-REQ-020 - Subscribe to INT-B and group by window_id # REQ: BACKBEAT-OPS-001 - Reverb can scale stateless backbeat-reverb: image: anthonyrawlins/backbeat-reverb:v1.0.2 command: > ./reverb -cluster=chorus-production -nats=nats://backbeat-nats:4222 -bar-length=8 -log-level=info # Internal service ports (not externally exposed - routed via Traefik) expose: - "8080" # Admin API # REQ: BACKBEAT-OPS-002 - Health probes for orchestration (temporarily disabled for testing) # healthcheck: # test: ["CMD", "nc", "-z", "localhost", "8080"] # interval: 30s # timeout: 10s # retries: 3 # start_period: 60s deploy: replicas: 2 # Stateless, can scale horizontally restart_policy: condition: on-failure delay: 10s max_attempts: 3 window: 120s update_config: parallelism: 1 delay: 15s failure_action: pause monitor: 45s order: start-first placement: preferences: - spread: node.hostname resources: limits: memory: 512M # Larger for window aggregation cpus: '1.0' reservations: memory: 256M cpus: '0.5' # Traefik routing for admin API labels: - traefik.enable=true - traefik.http.routers.backbeat-reverb.rule=Host(`backbeat-reverb.chorus.services`) - traefik.http.routers.backbeat-reverb.tls=true - traefik.http.routers.backbeat-reverb.tls.certresolver=letsencryptresolver - traefik.http.services.backbeat-reverb.loadbalancer.server.port=8080 networks: - chorus_net - tengig # External network for Traefik # Container logging logging: driver: "json-file" options: max-size: "10m" max-file: "3" tag: "backbeat-reverb/{{.Name}}/{{.ID}}" # NATS Message Broker - Use existing or deploy dedicated instance # REQ: BACKBEAT-INT-001 - Topics via NATS for at-least-once delivery backbeat-nats: image: nats:2.9-alpine command: ["--jetstream"] deploy: replicas: 1 restart_policy: condition: on-failure delay: 10s max_attempts: 3 window: 120s placement: preferences: - spread: node.hostname resources: limits: memory: 256M cpus: '0.5' reservations: memory: 128M cpus: '0.25' networks: - chorus_net # Container logging logging: driver: "json-file" options: max-size: "10m" max-file: "3" tag: "nats/{{.Name}}/{{.ID}}" watchtower: image: containrrr/watchtower volumes: - /var/run/docker.sock:/var/run/docker.sock command: --interval 300 --cleanup --revive-stopped --include-stopped restart: always # KACHING services are deployed separately in their own stack # License validation will access https://kaching.chorus.services/api # Persistent volumes volumes: prometheus_data: driver: local driver_opts: type: none o: bind device: /rust/containers/CHORUS/monitoring/prometheus prometheus_config: driver: local driver_opts: type: none o: bind device: /rust/containers/CHORUS/monitoring/prometheus grafana_data: driver: local driver_opts: type: none o: bind device: /rust/containers/CHORUS/monitoring/grafana chorus_data: driver: local whoosh_postgres_data: driver: local driver_opts: type: none o: bind device: /rust/containers/WHOOSH/postgres whoosh_redis_data: driver: local driver_opts: type: none o: bind device: /rust/containers/WHOOSH/redis whoosh_ui: driver: local driver_opts: type: none o: bind device: /rust/containers/WHOOSH/ui # Networks for CHORUS communication networks: tengig: external: true chorus_net: driver: overlay attachable: true configs: chorus_bootstrap: file: ./bootstrap.json secrets: chorus_license_id: external: true name: chorus_license_id resetdata_api_key: external: true name: resetdata_api_key whoosh_db_password: external: true name: whoosh_db_password gitea_token: external: true name: gitea_token webhook_token: external: true name: whoosh_webhook_token jwt_secret: external: true name: whoosh_jwt_secret_v4 service_tokens: external: true name: whoosh_service_tokens redis_password: external: true name: whoosh_redis_password