655 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			655 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			YAML
		
	
	
	
	
	
| version: "3.9"
 | |
| 
 | |
| services:
 | |
|   chorus:
 | |
|     image: anthonyrawlins/chorus:latest
 | |
|     
 | |
|     # REQUIRED: License configuration (CHORUS will not start without this)
 | |
|     environment:
 | |
|       # CRITICAL: License configuration - REQUIRED for operation
 | |
|       - CHORUS_LICENSE_ID_FILE=/run/secrets/chorus_license_id
 | |
|       - CHORUS_CLUSTER_ID=${CHORUS_CLUSTER_ID:-docker-cluster}
 | |
|       - CHORUS_KACHING_URL=${CHORUS_KACHING_URL:-https://kaching.chorus.services/api}
 | |
|       
 | |
|       # Agent configuration
 | |
|       - CHORUS_AGENT_ID=${CHORUS_AGENT_ID:-}  # Auto-generated if not provided
 | |
|       - CHORUS_SPECIALIZATION=${CHORUS_SPECIALIZATION:-general_developer}
 | |
|       - CHORUS_MAX_TASKS=${CHORUS_MAX_TASKS:-3}
 | |
|       - CHORUS_CAPABILITIES=general_development,task_coordination,admin_election
 | |
|       
 | |
|       # Network configuration
 | |
|       - CHORUS_API_PORT=8080
 | |
|       - CHORUS_HEALTH_PORT=8081
 | |
|       - CHORUS_P2P_PORT=9000
 | |
|       - CHORUS_BIND_ADDRESS=0.0.0.0
 | |
| 
 | |
|       # Scaling optimizations (as per WHOOSH issue #7)
 | |
|       - CHORUS_MDNS_ENABLED=false  # Disabled for container/swarm environments
 | |
|       - CHORUS_DIALS_PER_SEC=5     # Rate limit outbound connections to prevent storms
 | |
|       - CHORUS_MAX_CONCURRENT_DHT=16  # Limit concurrent DHT queries
 | |
| 
 | |
|       # Election stability windows (Medium-risk fix 2.1)
 | |
|       - CHORUS_ELECTION_MIN_TERM=30s  # Minimum time between elections to prevent churn
 | |
|       - CHORUS_LEADER_MIN_TERM=45s    # Minimum time before challenging healthy leader
 | |
| 
 | |
|       # Assignment system for runtime configuration (Medium-risk fix 2.2)
 | |
|       - ASSIGN_URL=${ASSIGN_URL:-}  # Optional: WHOOSH assignment endpoint
 | |
|       - TASK_SLOT=${TASK_SLOT:-}    # Optional: Task slot identifier
 | |
|       - TASK_ID=${TASK_ID:-}        # Optional: Task identifier
 | |
|       - NODE_ID=${NODE_ID:-}        # Optional: Node identifier
 | |
| 
 | |
|       # Bootstrap pool configuration (supports JSON and CSV)
 | |
|       - BOOTSTRAP_JSON=/config/bootstrap.json  # Optional: JSON bootstrap config
 | |
|       - CHORUS_BOOTSTRAP_PEERS=${CHORUS_BOOTSTRAP_PEERS:-}  # CSV fallback
 | |
|       
 | |
|       # AI configuration - Provider selection
 | |
|       - CHORUS_AI_PROVIDER=${CHORUS_AI_PROVIDER:-resetdata}
 | |
|       
 | |
|       # ResetData configuration (default provider)
 | |
|       - RESETDATA_BASE_URL=${RESETDATA_BASE_URL:-https://models.au-syd.resetdata.ai/v1}
 | |
|       - RESETDATA_API_KEY_FILE=/run/secrets/resetdata_api_key
 | |
|       - RESETDATA_MODEL=${RESETDATA_MODEL:-meta/llama-3.1-8b-instruct}
 | |
|       
 | |
|       # Ollama configuration (alternative provider)
 | |
|       - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://host.docker.internal:11434}
 | |
|       
 | |
|       # Model configuration
 | |
|       - CHORUS_MODELS=${CHORUS_MODELS:-meta/llama-3.1-8b-instruct}
 | |
|       - CHORUS_DEFAULT_REASONING_MODEL=${CHORUS_DEFAULT_REASONING_MODEL:-meta/llama-3.1-8b-instruct}
 | |
|       
 | |
|       # Logging configuration
 | |
|       - LOG_LEVEL=${LOG_LEVEL:-info}
 | |
|       - LOG_FORMAT=${LOG_FORMAT:-structured}
 | |
|       
 | |
|       # BACKBEAT configuration
 | |
|       - CHORUS_BACKBEAT_ENABLED=${CHORUS_BACKBEAT_ENABLED:-true}
 | |
|       - CHORUS_BACKBEAT_CLUSTER_ID=${CHORUS_BACKBEAT_CLUSTER_ID:-chorus-production}
 | |
|       - CHORUS_BACKBEAT_AGENT_ID=${CHORUS_BACKBEAT_AGENT_ID:-}  # Auto-generated from CHORUS_AGENT_ID
 | |
|       - CHORUS_BACKBEAT_NATS_URL=${CHORUS_BACKBEAT_NATS_URL:-nats://backbeat-nats:4222}
 | |
|       
 | |
|       # Prompt sourcing (mounted volume)
 | |
|       - CHORUS_PROMPTS_DIR=/etc/chorus/prompts
 | |
|       - CHORUS_DEFAULT_INSTRUCTIONS_PATH=/etc/chorus/prompts/defaults.md
 | |
|       - CHORUS_ROLE=${CHORUS_ROLE:-arbiter}
 | |
|     
 | |
|     # Docker secrets for sensitive configuration
 | |
|     secrets:
 | |
|       - chorus_license_id
 | |
|       - resetdata_api_key
 | |
| 
 | |
|     # Configuration files
 | |
|     configs:
 | |
|       - source: chorus_bootstrap
 | |
|         target: /config/bootstrap.json
 | |
|       
 | |
|     # Persistent data storage
 | |
|     volumes:
 | |
|       - chorus_data:/app/data
 | |
|       # Mount prompts directory read-only for role YAMLs and defaults.md
 | |
|       - /rust/containers/WHOOSH/prompts:/etc/chorus/prompts:ro
 | |
|     
 | |
|     # Network ports
 | |
|     ports:
 | |
|       - "${CHORUS_P2P_PORT:-9000}:9000"      # P2P communication
 | |
|     
 | |
|     # Container resource limits
 | |
|     deploy:
 | |
|       mode: replicated
 | |
|       replicas: ${CHORUS_REPLICAS:-9}
 | |
|       update_config:
 | |
|         parallelism: 1
 | |
|         delay: 10s
 | |
|         failure_action: pause
 | |
|         order: start-first
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 5s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       resources:
 | |
|         limits:
 | |
|           cpus: "${CHORUS_CPU_LIMIT:-1.0}"
 | |
|           memory: "${CHORUS_MEMORY_LIMIT:-1G}"
 | |
|         reservations:
 | |
|           cpus: "0.1"
 | |
|           memory: 128M
 | |
|       placement:
 | |
|         constraints:
 | |
|           - node.hostname != acacia
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       # CHORUS is internal-only, no Traefik labels needed
 | |
|     
 | |
|     # Network configuration
 | |
|     networks:
 | |
|       - chorus_net
 | |
|     
 | |
|     # Host resolution for external services
 | |
|     extra_hosts:
 | |
|       - "host.docker.internal:host-gateway"
 | |
|     
 | |
|     # Container logging configuration
 | |
|     logging:
 | |
|       driver: "json-file"
 | |
|       options:
 | |
|         max-size: "10m"
 | |
|         max-file: "3"
 | |
|         tag: "{{.ImageName}}/{{.Name}}/{{.ID}}"
 | |
|     
 | |
|     # Health check configuration
 | |
|     healthcheck:
 | |
|       test: ["CMD", "curl", "-f", "http://localhost:8081/health"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 10s
 | |
| 
 | |
|   whoosh:
 | |
|     image: anthonyrawlins/whoosh:latest
 | |
|     ports:
 | |
|       - target: 8080
 | |
|         published: 8800
 | |
|         protocol: tcp
 | |
|         mode: ingress
 | |
|     environment:
 | |
|       # Database configuration  
 | |
|       WHOOSH_DATABASE_DB_HOST: postgres
 | |
|       WHOOSH_DATABASE_DB_PORT: 5432
 | |
|       WHOOSH_DATABASE_DB_NAME: whoosh
 | |
|       WHOOSH_DATABASE_DB_USER: whoosh
 | |
|       WHOOSH_DATABASE_DB_PASSWORD_FILE: /run/secrets/whoosh_db_password
 | |
|       WHOOSH_DATABASE_DB_SSL_MODE: disable
 | |
|       WHOOSH_DATABASE_DB_AUTO_MIGRATE: "true"
 | |
| 
 | |
|       # Server configuration
 | |
|       WHOOSH_SERVER_LISTEN_ADDR: ":8080"
 | |
|       WHOOSH_SERVER_READ_TIMEOUT: "30s"
 | |
|       WHOOSH_SERVER_WRITE_TIMEOUT: "30s"
 | |
|       WHOOSH_SERVER_SHUTDOWN_TIMEOUT: "30s"
 | |
| 
 | |
|       # GITEA configuration
 | |
|       WHOOSH_GITEA_BASE_URL: https://gitea.chorus.services  
 | |
|       WHOOSH_GITEA_TOKEN_FILE: /run/secrets/gitea_token
 | |
|       WHOOSH_GITEA_WEBHOOK_TOKEN_FILE: /run/secrets/webhook_token
 | |
|       WHOOSH_GITEA_WEBHOOK_PATH: /webhooks/gitea
 | |
| 
 | |
|       # Auth configuration
 | |
|       WHOOSH_AUTH_JWT_SECRET_FILE: /run/secrets/jwt_secret
 | |
|       WHOOSH_AUTH_SERVICE_TOKENS_FILE: /run/secrets/service_tokens
 | |
|       WHOOSH_AUTH_JWT_EXPIRY: "24h"
 | |
| 
 | |
|       # Logging
 | |
|       WHOOSH_LOGGING_LEVEL: debug
 | |
|       WHOOSH_LOGGING_ENVIRONMENT: production
 | |
| 
 | |
|       # Redis configuration
 | |
|       WHOOSH_REDIS_ENABLED: "true"
 | |
|       WHOOSH_REDIS_HOST: redis
 | |
|       WHOOSH_REDIS_PORT: 6379
 | |
|       WHOOSH_REDIS_PASSWORD_FILE: /run/secrets/redis_password
 | |
|       WHOOSH_REDIS_DATABASE: 0
 | |
| 
 | |
|       # Scaling system configuration
 | |
|       WHOOSH_SCALING_KACHING_URL: "https://kaching.chorus.services"
 | |
|       WHOOSH_SCALING_BACKBEAT_URL: "http://backbeat-pulse:8080"
 | |
|       WHOOSH_SCALING_CHORUS_URL: "http://chorus:9000"
 | |
| 
 | |
|       # BACKBEAT integration configuration (temporarily disabled)
 | |
|       WHOOSH_BACKBEAT_ENABLED: "false"
 | |
|       WHOOSH_BACKBEAT_CLUSTER_ID: "chorus-production"
 | |
|       WHOOSH_BACKBEAT_AGENT_ID: "whoosh"
 | |
|       WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"
 | |
| 
 | |
|       # Docker integration configuration (disabled for agent assignment architecture)
 | |
|       WHOOSH_DOCKER_ENABLED: "false"
 | |
| 
 | |
|     secrets:
 | |
|       - whoosh_db_password
 | |
|       - gitea_token
 | |
|       - webhook_token
 | |
|       - jwt_secret
 | |
|       - service_tokens
 | |
|       - redis_password
 | |
|     # volumes:
 | |
|       # - /var/run/docker.sock:/var/run/docker.sock  # Disabled for agent assignment architecture
 | |
|     deploy:
 | |
|       replicas: 2
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 5s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       update_config:
 | |
|         parallelism: 1
 | |
|         delay: 10s
 | |
|         failure_action: pause
 | |
|         monitor: 60s
 | |
|         order: start-first
 | |
|       # rollback_config:
 | |
|       #   parallelism: 1
 | |
|       #   delay: 0s
 | |
|       #   failure_action: pause
 | |
|       #   monitor: 60s
 | |
|       #   order: stop-first
 | |
|       placement:
 | |
|         constraints:
 | |
|           - node.hostname != acacia
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 256M
 | |
|           cpus: '0.5'
 | |
|         reservations:
 | |
|           memory: 128M
 | |
|           cpus: '0.25'
 | |
|       labels:
 | |
|         - traefik.enable=true
 | |
|         - traefik.docker.network=tengig
 | |
|         - traefik.http.routers.whoosh.rule=Host(`whoosh.chorus.services`)
 | |
|         - traefik.http.routers.whoosh.tls=true
 | |
|         - traefik.http.routers.whoosh.tls.certresolver=letsencryptresolver
 | |
|         - traefik.http.routers.photoprism.entrypoints=web,web-secured
 | |
|         - traefik.http.services.whoosh.loadbalancer.server.port=8080
 | |
|         - traefik.http.services.photoprism.loadbalancer.passhostheader=true
 | |
|         - traefik.http.middlewares.whoosh-auth.basicauth.users=admin:$2y$10$example_hash
 | |
|     networks:
 | |
|       - tengig
 | |
|       - chorus_net
 | |
|     healthcheck:
 | |
|       test: ["CMD", "/app/whoosh", "--health-check"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 40s
 | |
| 
 | |
|   postgres:
 | |
|     image: postgres:15-alpine
 | |
|     environment:
 | |
|       POSTGRES_DB: whoosh
 | |
|       POSTGRES_USER: whoosh
 | |
|       POSTGRES_PASSWORD_FILE: /run/secrets/whoosh_db_password
 | |
|       POSTGRES_INITDB_ARGS: --auth-host=scram-sha-256
 | |
|     secrets:
 | |
|       - whoosh_db_password
 | |
|     volumes:
 | |
|       - whoosh_postgres_data:/var/lib/postgresql/data
 | |
|     deploy:
 | |
|       replicas: 1
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 5s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       placement:
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 512M
 | |
|           cpus: '1.0'
 | |
|         reservations:
 | |
|           memory: 256M
 | |
|           cpus: '0.5'
 | |
|     networks:
 | |
|       - chorus_net
 | |
|     healthcheck:
 | |
|       test: ["CMD-SHELL", "pg_isready -h localhost -p 5432 -U whoosh -d whoosh"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 5
 | |
|       start_period: 40s
 | |
| 
 | |
| 
 | |
|   redis:
 | |
|     image: redis:7-alpine
 | |
|     command: sh -c 'redis-server --requirepass "$$(cat /run/secrets/redis_password)" --appendonly yes'
 | |
|     secrets:
 | |
|       - redis_password
 | |
|     volumes:
 | |
|       - whoosh_redis_data:/data
 | |
|     deploy:
 | |
|       replicas: 1
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 5s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       placement:
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 128M
 | |
|           cpus: '0.25'
 | |
|         reservations:
 | |
|           memory: 64M
 | |
|           cpus: '0.1'
 | |
|     networks:
 | |
|       - chorus_net
 | |
|     healthcheck:
 | |
|       test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $$(cat /run/secrets/redis_password) ping"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 30s
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
|   prometheus:
 | |
|     image: prom/prometheus:latest
 | |
|     command:
 | |
|       - '--config.file=/etc/prometheus/prometheus.yml'
 | |
|       - '--storage.tsdb.path=/prometheus'
 | |
|       - '--web.console.libraries=/usr/share/prometheus/console_libraries'
 | |
|       - '--web.console.templates=/usr/share/prometheus/consoles'
 | |
|     volumes:
 | |
|       - /rust/containers/CHORUS/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
 | |
|       - /rust/containers/CHORUS/monitoring/prometheus:/prometheus
 | |
|     ports:
 | |
|       - "9099:9090" # Expose Prometheus UI
 | |
|     deploy:
 | |
|       replicas: 1
 | |
|       labels:
 | |
|         - traefik.enable=true
 | |
|         - traefik.http.routers.prometheus.rule=Host(`prometheus.chorus.services`)
 | |
|         - traefik.http.routers.prometheus.entrypoints=web,web-secured
 | |
|         - traefik.http.routers.prometheus.tls=true
 | |
|         - traefik.http.routers.prometheus.tls.certresolver=letsencryptresolver
 | |
|         - traefik.http.services.prometheus.loadbalancer.server.port=9090
 | |
|     networks:
 | |
|       - chorus_net
 | |
|       - tengig
 | |
|     healthcheck:
 | |
|       test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/ready"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 10s
 | |
| 
 | |
|   grafana:
 | |
|     image: grafana/grafana:latest
 | |
|     user: "1000:1000"
 | |
|     environment:
 | |
|       - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin} # Use a strong password in production
 | |
|       - GF_SERVER_ROOT_URL=https://grafana.chorus.services
 | |
|     volumes:
 | |
|       - /rust/containers/CHORUS/monitoring/grafana:/var/lib/grafana
 | |
|     ports:
 | |
|       - "3300:3000" # Expose Grafana UI
 | |
|     deploy:
 | |
|       replicas: 1
 | |
|       labels:
 | |
|         - traefik.enable=true
 | |
|         - traefik.http.routers.grafana.rule=Host(`grafana.chorus.services`)
 | |
|         - traefik.http.routers.grafana.entrypoints=web,web-secured
 | |
|         - traefik.http.routers.grafana.tls=true
 | |
|         - traefik.http.routers.grafana.tls.certresolver=letsencryptresolver
 | |
|         - traefik.http.services.grafana.loadbalancer.server.port=3000
 | |
|     networks:
 | |
|       - chorus_net
 | |
|       - tengig
 | |
|     healthcheck:
 | |
|       test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 10s
 | |
| 
 | |
|   # BACKBEAT Pulse Service - Leader-elected tempo broadcaster
 | |
|   # REQ: BACKBEAT-REQ-001 - Single BeatFrame publisher per cluster
 | |
|   # REQ: BACKBEAT-OPS-001 - One replica prefers leadership
 | |
|   backbeat-pulse:
 | |
|     image: anthonyrawlins/backbeat-pulse:v1.0.5
 | |
|     command: >
 | |
|       ./pulse
 | |
|       -cluster=chorus-production
 | |
|       -admin-port=8080
 | |
|       -raft-bind=0.0.0.0:9000
 | |
|       -data-dir=/data
 | |
|       -nats=nats://backbeat-nats:4222
 | |
|       -tempo=2
 | |
|       -bar-length=8
 | |
|       -log-level=info
 | |
|     
 | |
|     # Internal service ports (not externally exposed - routed via Traefik)
 | |
|     expose:
 | |
|       - "8080"  # Admin API
 | |
|       - "9000"  # Raft communication
 | |
|     
 | |
|     # REQ: BACKBEAT-OPS-002 - Health probes for liveness/readiness
 | |
|     healthcheck:
 | |
|       test: ["CMD", "nc", "-z", "localhost", "8080"]
 | |
|       interval: 30s
 | |
|       timeout: 10s
 | |
|       retries: 3
 | |
|       start_period: 60s
 | |
|     
 | |
|     deploy:
 | |
|       replicas: 1              # Single leader with automatic failover
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 30s             # Wait longer for NATS to be ready
 | |
|         max_attempts: 5
 | |
|         window: 120s
 | |
|       update_config:
 | |
|         parallelism: 1
 | |
|         delay: 30s             # Wait for leader election
 | |
|         failure_action: pause
 | |
|         monitor: 60s
 | |
|         order: start-first
 | |
|       placement:
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 256M
 | |
|           cpus: '0.5'
 | |
|         reservations:
 | |
|           memory: 128M
 | |
|           cpus: '0.25'
 | |
|       # Traefik routing for admin API
 | |
|       labels:
 | |
|         - traefik.enable=true
 | |
|         - traefik.http.routers.backbeat-pulse.rule=Host(`backbeat-pulse.chorus.services`)
 | |
|         - traefik.http.routers.backbeat-pulse.tls=true
 | |
|         - traefik.http.routers.backbeat-pulse.tls.certresolver=letsencryptresolver
 | |
|         - traefik.http.services.backbeat-pulse.loadbalancer.server.port=8080
 | |
|     
 | |
|     networks:
 | |
|       - chorus_net
 | |
|       - tengig              # External network for Traefik
 | |
|     
 | |
|     # Container logging
 | |
|     logging:
 | |
|       driver: "json-file"
 | |
|       options:
 | |
|         max-size: "10m"
 | |
|         max-file: "3"
 | |
|         tag: "backbeat-pulse/{{.Name}}/{{.ID}}"
 | |
| 
 | |
|   # BACKBEAT Reverb Service - StatusClaim aggregator
 | |
|   # REQ: BACKBEAT-REQ-020 - Subscribe to INT-B and group by window_id
 | |
|   # REQ: BACKBEAT-OPS-001 - Reverb can scale stateless
 | |
|   backbeat-reverb:
 | |
|     image: anthonyrawlins/backbeat-reverb:v1.0.2
 | |
|     command: >
 | |
|       ./reverb
 | |
|       -cluster=chorus-production
 | |
|       -nats=nats://backbeat-nats:4222
 | |
|       -bar-length=8
 | |
|       -log-level=info
 | |
|     
 | |
|     # Internal service ports (not externally exposed - routed via Traefik)
 | |
|     expose:
 | |
|       - "8080"  # Admin API
 | |
|     
 | |
|     # REQ: BACKBEAT-OPS-002 - Health probes for orchestration (temporarily disabled for testing)
 | |
|     # healthcheck:
 | |
|     #   test: ["CMD", "nc", "-z", "localhost", "8080"]
 | |
|     #   interval: 30s
 | |
|     #   timeout: 10s
 | |
|     #   retries: 3
 | |
|     #   start_period: 60s
 | |
|     
 | |
|     deploy:
 | |
|       replicas: 2              # Stateless, can scale horizontally
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 10s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       update_config:
 | |
|         parallelism: 1
 | |
|         delay: 15s
 | |
|         failure_action: pause
 | |
|         monitor: 45s
 | |
|         order: start-first
 | |
|       placement:
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 512M         # Larger for window aggregation
 | |
|           cpus: '1.0'
 | |
|         reservations:
 | |
|           memory: 256M
 | |
|           cpus: '0.5'
 | |
|       # Traefik routing for admin API  
 | |
|       labels:
 | |
|         - traefik.enable=true
 | |
|         - traefik.http.routers.backbeat-reverb.rule=Host(`backbeat-reverb.chorus.services`)
 | |
|         - traefik.http.routers.backbeat-reverb.tls=true
 | |
|         - traefik.http.routers.backbeat-reverb.tls.certresolver=letsencryptresolver
 | |
|         - traefik.http.services.backbeat-reverb.loadbalancer.server.port=8080
 | |
|     
 | |
|     networks:
 | |
|       - chorus_net
 | |
|       - tengig              # External network for Traefik
 | |
|     
 | |
|     # Container logging
 | |
|     logging:
 | |
|       driver: "json-file"
 | |
|       options:
 | |
|         max-size: "10m"
 | |
|         max-file: "3"
 | |
|         tag: "backbeat-reverb/{{.Name}}/{{.ID}}"
 | |
| 
 | |
|   # NATS Message Broker - Use existing or deploy dedicated instance
 | |
|   # REQ: BACKBEAT-INT-001 - Topics via NATS for at-least-once delivery
 | |
|   backbeat-nats:
 | |
|     image: nats:2.9-alpine
 | |
|     command: ["--jetstream"]
 | |
|     deploy:
 | |
|       replicas: 1
 | |
|       restart_policy:
 | |
|         condition: on-failure
 | |
|         delay: 10s
 | |
|         max_attempts: 3
 | |
|         window: 120s
 | |
|       placement:
 | |
|         preferences:
 | |
|           - spread: node.hostname
 | |
|       resources:
 | |
|         limits:
 | |
|           memory: 256M
 | |
|           cpus: '0.5'
 | |
|         reservations:
 | |
|           memory: 128M
 | |
|           cpus: '0.25'
 | |
|     networks:
 | |
|       - chorus_net
 | |
|     # Container logging
 | |
|     logging:
 | |
|       driver: "json-file"
 | |
|       options:
 | |
|         max-size: "10m"
 | |
|         max-file: "3"
 | |
|         tag: "nats/{{.Name}}/{{.ID}}"
 | |
| 
 | |
|   # KACHING services are deployed separately in their own stack
 | |
|   # License validation will access https://kaching.chorus.services/api
 | |
| 
 | |
| # Persistent volumes
 | |
| volumes:
 | |
|   prometheus_data:
 | |
|     driver: local
 | |
|     driver_opts:
 | |
|       type: none
 | |
|       o: bind
 | |
|       device: /rust/containers/CHORUS/monitoring/prometheus
 | |
|   prometheus_config:
 | |
|     driver: local
 | |
|     driver_opts:
 | |
|       type: none
 | |
|       o: bind
 | |
|       device: /rust/containers/CHORUS/monitoring/prometheus
 | |
|   grafana_data:
 | |
|     driver: local
 | |
|     driver_opts:
 | |
|       type: none
 | |
|       o: bind
 | |
|       device: /rust/containers/CHORUS/monitoring/grafana
 | |
|   chorus_data:
 | |
|     driver: local
 | |
|   whoosh_postgres_data:
 | |
|     driver: local
 | |
|     driver_opts:
 | |
|       type: none
 | |
|       o: bind
 | |
|       device: /rust/containers/WHOOSH/postgres
 | |
|   whoosh_redis_data:
 | |
|     driver: local
 | |
|     driver_opts:
 | |
|       type: none
 | |
|       o: bind
 | |
|       device: /rust/containers/WHOOSH/redis
 | |
| 
 | |
| 
 | |
| # Networks for CHORUS communication
 | |
| networks:
 | |
|   tengig:
 | |
|     external: true
 | |
| 
 | |
|   chorus_net:
 | |
|     driver: overlay
 | |
|     attachable: true
 | |
| 
 | |
| 
 | |
| configs:
 | |
|   chorus_bootstrap:
 | |
|     file: ./bootstrap.json
 | |
| 
 | |
| secrets:
 | |
|   chorus_license_id:
 | |
|     external: true
 | |
|     name: chorus_license_id
 | |
|   resetdata_api_key:
 | |
|     external: true
 | |
|     name: resetdata_api_key
 | |
|   whoosh_db_password:
 | |
|     external: true
 | |
|     name: whoosh_db_password
 | |
|   gitea_token:
 | |
|     external: true
 | |
|     name: gitea_token
 | |
|   webhook_token:
 | |
|     external: true
 | |
|     name: whoosh_webhook_token
 | |
|   jwt_secret:
 | |
|     external: true
 | |
|     name: whoosh_jwt_secret
 | |
|   service_tokens:
 | |
|     external: true
 | |
|     name: whoosh_service_tokens
 | |
|   redis_password:
 | |
|     external: true
 | |
|     name: whoosh_redis_password
 | 
