bootstrap: freeze March 8 release path and evidence tooling
This commit is contained in:
@@ -2,100 +2,75 @@ version: "3.9"
|
||||
|
||||
services:
|
||||
chorus:
|
||||
image: anthonyrawlins/chorus:latest
|
||||
|
||||
# REQUIRED: License configuration (CHORUS will not start without this)
|
||||
image: localhost:5000/chorus:march8-evidence-20260226-2
|
||||
environment:
|
||||
# CRITICAL: License configuration - REQUIRED for operation
|
||||
- CHORUS_LICENSE_ID_FILE=/run/secrets/chorus_license_id
|
||||
- CHORUS_CLUSTER_ID=${CHORUS_CLUSTER_ID:-docker-cluster}
|
||||
- CHORUS_KACHING_URL=${CHORUS_KACHING_URL:-https://kaching.chorus.services/api}
|
||||
|
||||
# Agent configuration
|
||||
- CHORUS_AGENT_ID=${CHORUS_AGENT_ID:-} # Auto-generated if not provided
|
||||
- CHORUS_KACHING_URL=${CHORUS_KACHING_URL:-http://host.docker.internal:8099}
|
||||
- CHORUS_AGENT_ID=${CHORUS_AGENT_ID:-}
|
||||
- CHORUS_SPECIALIZATION=${CHORUS_SPECIALIZATION:-general_developer}
|
||||
- CHORUS_MAX_TASKS=${CHORUS_MAX_TASKS:-3}
|
||||
- CHORUS_CAPABILITIES=general_development,task_coordination,admin_election
|
||||
|
||||
# Network configuration
|
||||
- CHORUS_API_PORT=8080
|
||||
- CHORUS_HEALTH_PORT=8081
|
||||
- CHORUS_P2P_PORT=9000
|
||||
- CHORUS_BIND_ADDRESS=0.0.0.0
|
||||
|
||||
# Scaling optimizations (as per WHOOSH issue #7)
|
||||
- CHORUS_MDNS_ENABLED=false # Disabled for container/swarm environments
|
||||
- CHORUS_DIALS_PER_SEC=5 # Rate limit outbound connections to prevent storms
|
||||
- CHORUS_MAX_CONCURRENT_DHT=16 # Limit concurrent DHT queries
|
||||
|
||||
# Election stability windows (Medium-risk fix 2.1)
|
||||
- CHORUS_ELECTION_MIN_TERM=30s # Minimum time between elections to prevent churn
|
||||
- CHORUS_LEADER_MIN_TERM=45s # Minimum time before challenging healthy leader
|
||||
|
||||
# Assignment system for runtime configuration (Medium-risk fix 2.2)
|
||||
- ASSIGN_URL=${ASSIGN_URL:-} # Optional: WHOOSH assignment endpoint
|
||||
- TASK_SLOT=${TASK_SLOT:-} # Optional: Task slot identifier
|
||||
- TASK_ID=${TASK_ID:-} # Optional: Task identifier
|
||||
- NODE_ID=${NODE_ID:-} # Optional: Node identifier
|
||||
|
||||
# Bootstrap pool configuration (supports JSON and CSV)
|
||||
- BOOTSTRAP_JSON=/config/bootstrap.json # Optional: JSON bootstrap config
|
||||
- CHORUS_BOOTSTRAP_PEERS=${CHORUS_BOOTSTRAP_PEERS:-} # CSV fallback
|
||||
|
||||
# AI configuration - Provider selection
|
||||
- CHORUS_MDNS_ENABLED=false
|
||||
- CHORUS_DIALS_PER_SEC=5
|
||||
- CHORUS_MAX_CONCURRENT_DHT=16
|
||||
- CHORUS_ELECTION_MIN_TERM=120s
|
||||
- CHORUS_LEADER_MIN_TERM=240s
|
||||
- ASSIGN_URL=${ASSIGN_URL:-}
|
||||
- TASK_SLOT=${TASK_SLOT:-}
|
||||
- TASK_ID=${TASK_ID:-}
|
||||
- NODE_ID=${NODE_ID:-}
|
||||
- WHOOSH_API_BASE_URL=${SWOOSH_API_BASE_URL:-http://swoosh:8080}
|
||||
- WHOOSH_API_ENABLED=true
|
||||
- BOOTSTRAP_JSON=/config/bootstrap.json
|
||||
- CHORUS_BOOTSTRAP_PEERS=${CHORUS_BOOTSTRAP_PEERS:-}
|
||||
- CHORUS_AI_PROVIDER=${CHORUS_AI_PROVIDER:-resetdata}
|
||||
|
||||
# ResetData configuration (default provider)
|
||||
- RESETDATA_BASE_URL=${RESETDATA_BASE_URL:-https://models.au-syd.resetdata.ai/v1}
|
||||
- RESETDATA_BASE_URL=${RESETDATA_BASE_URL:-https://app.resetdata.ai/api/v1}
|
||||
- RESETDATA_API_KEY_FILE=/run/secrets/resetdata_api_key
|
||||
- RESETDATA_MODEL=${RESETDATA_MODEL:-meta/llama-3.1-8b-instruct}
|
||||
|
||||
# Ollama configuration (alternative provider)
|
||||
- RESETDATA_MODEL=${RESETDATA_MODEL:-openai/gpt-oss-120b}
|
||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://host.docker.internal:11434}
|
||||
|
||||
# Model configuration
|
||||
- CHORUS_MODELS=${CHORUS_MODELS:-meta/llama-3.1-8b-instruct}
|
||||
- CHORUS_DEFAULT_REASONING_MODEL=${CHORUS_DEFAULT_REASONING_MODEL:-meta/llama-3.1-8b-instruct}
|
||||
|
||||
# Logging configuration
|
||||
- CHORUS_LIGHTRAG_ENABLED=${CHORUS_LIGHTRAG_ENABLED:-true}
|
||||
- CHORUS_LIGHTRAG_BASE_URL=${CHORUS_LIGHTRAG_BASE_URL:-http://host.docker.internal:9621}
|
||||
- CHORUS_LIGHTRAG_TIMEOUT=${CHORUS_LIGHTRAG_TIMEOUT:-30s}
|
||||
- CHORUS_LIGHTRAG_API_KEY=${CHORUS_LIGHTRAG_API_KEY:-your-secure-api-key-here}
|
||||
- CHORUS_LIGHTRAG_DEFAULT_MODE=${CHORUS_LIGHTRAG_DEFAULT_MODE:-hybrid}
|
||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||
- LOG_FORMAT=${LOG_FORMAT:-structured}
|
||||
|
||||
# BACKBEAT configuration
|
||||
- CHORUS_BACKBEAT_ENABLED=${CHORUS_BACKBEAT_ENABLED:-true}
|
||||
- CHORUS_BACKBEAT_CLUSTER_ID=${CHORUS_BACKBEAT_CLUSTER_ID:-chorus-production}
|
||||
- CHORUS_BACKBEAT_AGENT_ID=${CHORUS_BACKBEAT_AGENT_ID:-} # Auto-generated from CHORUS_AGENT_ID
|
||||
- CHORUS_BACKBEAT_AGENT_ID=${CHORUS_BACKBEAT_AGENT_ID:-}
|
||||
- CHORUS_BACKBEAT_NATS_URL=${CHORUS_BACKBEAT_NATS_URL:-nats://backbeat-nats:4222}
|
||||
|
||||
# Prompt sourcing (mounted volume)
|
||||
- CHORUS_TRANSPORT_TELEMETRY_INTERVAL=${CHORUS_TRANSPORT_TELEMETRY_INTERVAL:-30s}
|
||||
- CHORUS_TRANSPORT_TELEMETRY_SUBJECT=${CHORUS_TRANSPORT_TELEMETRY_SUBJECT:-chorus.telemetry.transport}
|
||||
- CHORUS_TRANSPORT_METRICS_NATS_URL=${CHORUS_TRANSPORT_METRICS_NATS_URL:-}
|
||||
- CHORUS_TRANSPORT_MODE=${CHORUS_TRANSPORT_MODE:-quic_only}
|
||||
- CHORUS_PROMPTS_DIR=/etc/chorus/prompts
|
||||
- CHORUS_DEFAULT_INSTRUCTIONS_PATH=/etc/chorus/prompts/defaults.md
|
||||
- CHORUS_ROLE=${CHORUS_ROLE:-arbiter}
|
||||
|
||||
# Docker secrets for sensitive configuration
|
||||
secrets:
|
||||
- chorus_license_id
|
||||
- resetdata_api_key
|
||||
|
||||
# Configuration files
|
||||
configs:
|
||||
- source: chorus_bootstrap
|
||||
target: /config/bootstrap.json
|
||||
|
||||
# Persistent data storage
|
||||
volumes:
|
||||
- chorus_data:/app/data
|
||||
# Mount prompts directory read-only for role YAMLs and defaults.md
|
||||
- /rust/containers/WHOOSH/prompts:/etc/chorus/prompts:ro
|
||||
|
||||
# Network ports
|
||||
- /rust/containers/CHORUS/models.yaml:/app/configs/models.yaml:ro
|
||||
ports:
|
||||
- "${CHORUS_P2P_PORT:-9000}:9000" # P2P communication
|
||||
|
||||
# Container resource limits
|
||||
- "${CHORUS_P2P_PORT:-9000}:9000/tcp"
|
||||
- "${CHORUS_P2P_PORT:-9000}:9000/udp"
|
||||
deploy:
|
||||
labels:
|
||||
- shepherd.autodeploy=true
|
||||
mode: replicated
|
||||
replicas: ${CHORUS_REPLICAS:-9}
|
||||
replicas: ${CHORUS_REPLICAS:-20}
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 10s
|
||||
@@ -109,108 +84,46 @@ services:
|
||||
resources:
|
||||
limits:
|
||||
cpus: "${CHORUS_CPU_LIMIT:-1.0}"
|
||||
memory: "${CHORUS_MEMORY_LIMIT:-1G}"
|
||||
memory: "${CHORUS_MEMORY_LIMIT:-4G}"
|
||||
reservations:
|
||||
cpus: "0.1"
|
||||
cpus: "0.2"
|
||||
memory: 128M
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname != acacia
|
||||
preferences:
|
||||
- spread: node.hostname
|
||||
# CHORUS is internal-only, no Traefik labels needed
|
||||
|
||||
# Network configuration
|
||||
networks:
|
||||
- chorus_net
|
||||
|
||||
# Host resolution for external services
|
||||
- tengig
|
||||
- chorus_ipvlan
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
# Container logging configuration
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
tag: "{{.ImageName}}/{{.Name}}/{{.ID}}"
|
||||
|
||||
# Health check configuration
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8081/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
start_period: 30s # Increased from 10s to allow P2P mesh formation (15s bootstrap + margin)
|
||||
|
||||
whoosh:
|
||||
image: anthonyrawlins/whoosh:scaling-v1.0.0
|
||||
swoosh:
|
||||
image: anthonyrawlins/swoosh:1.0.2
|
||||
ports:
|
||||
- target: 8080
|
||||
published: 8800
|
||||
protocol: tcp
|
||||
mode: ingress
|
||||
environment:
|
||||
# Database configuration
|
||||
WHOOSH_DATABASE_DB_HOST: postgres
|
||||
WHOOSH_DATABASE_DB_PORT: 5432
|
||||
WHOOSH_DATABASE_DB_NAME: whoosh
|
||||
WHOOSH_DATABASE_DB_USER: whoosh
|
||||
WHOOSH_DATABASE_DB_PASSWORD_FILE: /run/secrets/whoosh_db_password
|
||||
WHOOSH_DATABASE_DB_SSL_MODE: disable
|
||||
WHOOSH_DATABASE_DB_AUTO_MIGRATE: "true"
|
||||
|
||||
# Server configuration
|
||||
WHOOSH_SERVER_LISTEN_ADDR: ":8080"
|
||||
WHOOSH_SERVER_READ_TIMEOUT: "30s"
|
||||
WHOOSH_SERVER_WRITE_TIMEOUT: "30s"
|
||||
WHOOSH_SERVER_SHUTDOWN_TIMEOUT: "30s"
|
||||
|
||||
# GITEA configuration
|
||||
WHOOSH_GITEA_BASE_URL: https://gitea.chorus.services
|
||||
WHOOSH_GITEA_TOKEN_FILE: /run/secrets/gitea_token
|
||||
WHOOSH_GITEA_WEBHOOK_TOKEN_FILE: /run/secrets/webhook_token
|
||||
WHOOSH_GITEA_WEBHOOK_PATH: /webhooks/gitea
|
||||
|
||||
# Auth configuration
|
||||
WHOOSH_AUTH_JWT_SECRET_FILE: /run/secrets/jwt_secret
|
||||
WHOOSH_AUTH_SERVICE_TOKENS_FILE: /run/secrets/service_tokens
|
||||
WHOOSH_AUTH_JWT_EXPIRY: "24h"
|
||||
|
||||
# Logging
|
||||
WHOOSH_LOGGING_LEVEL: debug
|
||||
WHOOSH_LOGGING_ENVIRONMENT: production
|
||||
|
||||
# Redis configuration
|
||||
WHOOSH_REDIS_ENABLED: "true"
|
||||
WHOOSH_REDIS_HOST: redis
|
||||
WHOOSH_REDIS_PORT: 6379
|
||||
WHOOSH_REDIS_PASSWORD_FILE: /run/secrets/redis_password
|
||||
WHOOSH_REDIS_DATABASE: 0
|
||||
|
||||
# Scaling system configuration
|
||||
WHOOSH_SCALING_KACHING_URL: "https://kaching.chorus.services"
|
||||
WHOOSH_SCALING_BACKBEAT_URL: "http://backbeat-pulse:8080"
|
||||
WHOOSH_SCALING_CHORUS_URL: "http://chorus:9000"
|
||||
|
||||
# BACKBEAT integration configuration (temporarily disabled)
|
||||
WHOOSH_BACKBEAT_ENABLED: "false"
|
||||
WHOOSH_BACKBEAT_CLUSTER_ID: "chorus-production"
|
||||
WHOOSH_BACKBEAT_AGENT_ID: "whoosh"
|
||||
WHOOSH_BACKBEAT_NATS_URL: "nats://backbeat-nats:4222"
|
||||
|
||||
secrets:
|
||||
- whoosh_db_password
|
||||
- gitea_token
|
||||
- webhook_token
|
||||
- jwt_secret
|
||||
- service_tokens
|
||||
- redis_password
|
||||
- SWOOSH_LISTEN_ADDR=:8080
|
||||
- SWOOSH_WAL_DIR=/data/wal
|
||||
- SWOOSH_SNAPSHOT_PATH=/data/snapshots/latest.json
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- swoosh_data:/data
|
||||
deploy:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
@@ -222,17 +135,6 @@ services:
|
||||
failure_action: pause
|
||||
monitor: 60s
|
||||
order: start-first
|
||||
# rollback_config:
|
||||
# parallelism: 1
|
||||
# delay: 0s
|
||||
# failure_action: pause
|
||||
# monitor: 60s
|
||||
# order: stop-first
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname != acacia
|
||||
preferences:
|
||||
- spread: node.hostname
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
@@ -243,18 +145,18 @@ services:
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.docker.network=tengig
|
||||
- traefik.http.routers.whoosh.rule=Host(`whoosh.chorus.services`)
|
||||
- traefik.http.routers.whoosh.tls=true
|
||||
- traefik.http.routers.whoosh.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.routers.photoprism.entrypoints=web,web-secured
|
||||
- traefik.http.services.whoosh.loadbalancer.server.port=8080
|
||||
- traefik.http.services.photoprism.loadbalancer.passhostheader=true
|
||||
- traefik.http.middlewares.whoosh-auth.basicauth.users=admin:$2y$10$example_hash
|
||||
- traefik.http.routers.swoosh.rule=Host(`swoosh.chorus.services`)
|
||||
- traefik.http.routers.swoosh.entrypoints=web,web-secured
|
||||
- traefik.http.routers.swoosh.tls=true
|
||||
- traefik.http.routers.swoosh.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.services.swoosh.loadbalancer.server.port=8080
|
||||
- shepherd.autodeploy=true
|
||||
- traefik.http.services.swoosh.loadbalancer.passhostheader=true
|
||||
networks:
|
||||
- tengig
|
||||
- chorus_net
|
||||
- chorus_ipvlan
|
||||
healthcheck:
|
||||
test: ["CMD", "/app/whoosh", "--health-check"]
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "-O", "/dev/null", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
@@ -263,10 +165,10 @@ services:
|
||||
postgres:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
POSTGRES_DB: whoosh
|
||||
POSTGRES_USER: whoosh
|
||||
POSTGRES_PASSWORD_FILE: /run/secrets/whoosh_db_password
|
||||
POSTGRES_INITDB_ARGS: --auth-host=scram-sha-256
|
||||
- POSTGRES_DB=whoosh
|
||||
- POSTGRES_USER=whoosh
|
||||
- POSTGRES_PASSWORD_FILE=/run/secrets/whoosh_db_password
|
||||
- POSTGRES_INITDB_ARGS=--auth-host=scram-sha-256
|
||||
secrets:
|
||||
- whoosh_db_password
|
||||
volumes:
|
||||
@@ -278,9 +180,9 @@ services:
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
window: 120s
|
||||
placement:
|
||||
preferences:
|
||||
- spread: node.hostname
|
||||
# placement:
|
||||
# constraints:
|
||||
# - node.hostname == ironwood
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
@@ -289,7 +191,8 @@ services:
|
||||
memory: 256M
|
||||
cpus: '0.5'
|
||||
networks:
|
||||
- chorus_net
|
||||
- tengig
|
||||
- chorus_ipvlan
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -h localhost -p 5432 -U whoosh -d whoosh"]
|
||||
interval: 30s
|
||||
@@ -297,7 +200,6 @@ services:
|
||||
retries: 5
|
||||
start_period: 40s
|
||||
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: sh -c 'redis-server --requirepass "$$(cat /run/secrets/redis_password)" --appendonly yes'
|
||||
@@ -323,7 +225,7 @@ services:
|
||||
memory: 64M
|
||||
cpus: '0.1'
|
||||
networks:
|
||||
- chorus_net
|
||||
- chorus_ipvlan
|
||||
healthcheck:
|
||||
test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $$(cat /run/secrets/redis_password) ping"]
|
||||
interval: 30s
|
||||
@@ -331,15 +233,6 @@ services:
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
command:
|
||||
@@ -350,8 +243,9 @@ services:
|
||||
volumes:
|
||||
- /rust/containers/CHORUS/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- /rust/containers/CHORUS/monitoring/prometheus:/prometheus
|
||||
- /rust/containers/CHORUS/observability/prometheus/alerts:/etc/prometheus/alerts:ro
|
||||
ports:
|
||||
- "9099:9090" # Expose Prometheus UI
|
||||
- "9099:9090"
|
||||
deploy:
|
||||
replicas: 1
|
||||
labels:
|
||||
@@ -361,8 +255,9 @@ services:
|
||||
- traefik.http.routers.prometheus.tls=true
|
||||
- traefik.http.routers.prometheus.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.services.prometheus.loadbalancer.server.port=9090
|
||||
- shepherd.autodeploy=true
|
||||
networks:
|
||||
- chorus_net
|
||||
- chorus_ipvlan
|
||||
- tengig
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/ready"]
|
||||
@@ -375,12 +270,12 @@ services:
|
||||
image: grafana/grafana:latest
|
||||
user: "1000:1000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin} # Use a strong password in production
|
||||
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
|
||||
- GF_SERVER_ROOT_URL=https://grafana.chorus.services
|
||||
volumes:
|
||||
- /rust/containers/CHORUS/monitoring/grafana:/var/lib/grafana
|
||||
ports:
|
||||
- "3300:3000" # Expose Grafana UI
|
||||
- "3300:3000"
|
||||
deploy:
|
||||
replicas: 1
|
||||
labels:
|
||||
@@ -390,8 +285,9 @@ services:
|
||||
- traefik.http.routers.grafana.tls=true
|
||||
- traefik.http.routers.grafana.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.services.grafana.loadbalancer.server.port=3000
|
||||
- shepherd.autodeploy=true
|
||||
networks:
|
||||
- chorus_net
|
||||
- chorus_ipvlan
|
||||
- tengig
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/api/health"]
|
||||
@@ -400,11 +296,8 @@ services:
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# BACKBEAT Pulse Service - Leader-elected tempo broadcaster
|
||||
# REQ: BACKBEAT-REQ-001 - Single BeatFrame publisher per cluster
|
||||
# REQ: BACKBEAT-OPS-001 - One replica prefers leadership
|
||||
backbeat-pulse:
|
||||
image: anthonyrawlins/backbeat-pulse:v1.0.5
|
||||
image: docker.io/anthonyrawlins/backbeat-pulse:latest
|
||||
command: >
|
||||
./pulse
|
||||
-cluster=chorus-production
|
||||
@@ -415,30 +308,25 @@ services:
|
||||
-tempo=2
|
||||
-bar-length=8
|
||||
-log-level=info
|
||||
|
||||
# Internal service ports (not externally exposed - routed via Traefik)
|
||||
expose:
|
||||
- "8080" # Admin API
|
||||
- "9000" # Raft communication
|
||||
|
||||
# REQ: BACKBEAT-OPS-002 - Health probes for liveness/readiness
|
||||
- "8080"
|
||||
- "9000"
|
||||
healthcheck:
|
||||
test: ["CMD", "nc", "-z", "localhost", "8080"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
||||
deploy:
|
||||
replicas: 1 # Single leader with automatic failover
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 30s # Wait longer for NATS to be ready
|
||||
delay: 30s
|
||||
max_attempts: 5
|
||||
window: 120s
|
||||
update_config:
|
||||
parallelism: 1
|
||||
delay: 30s # Wait for leader election
|
||||
delay: 30s
|
||||
failure_action: pause
|
||||
monitor: 60s
|
||||
order: start-first
|
||||
@@ -452,19 +340,15 @@ services:
|
||||
reservations:
|
||||
memory: 128M
|
||||
cpus: '0.25'
|
||||
# Traefik routing for admin API
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.http.routers.backbeat-pulse.rule=Host(`backbeat-pulse.chorus.services`)
|
||||
- traefik.http.routers.backbeat-pulse.tls=true
|
||||
- traefik.http.routers.backbeat-pulse.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.services.backbeat-pulse.loadbalancer.server.port=8080
|
||||
|
||||
networks:
|
||||
- chorus_net
|
||||
- tengig # External network for Traefik
|
||||
|
||||
# Container logging
|
||||
- chorus_ipvlan
|
||||
- tengig
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@@ -472,32 +356,18 @@ services:
|
||||
max-file: "3"
|
||||
tag: "backbeat-pulse/{{.Name}}/{{.ID}}"
|
||||
|
||||
# BACKBEAT Reverb Service - StatusClaim aggregator
|
||||
# REQ: BACKBEAT-REQ-020 - Subscribe to INT-B and group by window_id
|
||||
# REQ: BACKBEAT-OPS-001 - Reverb can scale stateless
|
||||
backbeat-reverb:
|
||||
image: anthonyrawlins/backbeat-reverb:v1.0.2
|
||||
image: docker.io/anthonyrawlins/backbeat-reverb:latest
|
||||
command: >
|
||||
./reverb
|
||||
-cluster=chorus-production
|
||||
-nats=nats://backbeat-nats:4222
|
||||
-bar-length=8
|
||||
-log-level=info
|
||||
|
||||
# Internal service ports (not externally exposed - routed via Traefik)
|
||||
expose:
|
||||
- "8080" # Admin API
|
||||
|
||||
# REQ: BACKBEAT-OPS-002 - Health probes for orchestration (temporarily disabled for testing)
|
||||
# healthcheck:
|
||||
# test: ["CMD", "nc", "-z", "localhost", "8080"]
|
||||
# interval: 30s
|
||||
# timeout: 10s
|
||||
# retries: 3
|
||||
# start_period: 60s
|
||||
|
||||
- "8080"
|
||||
deploy:
|
||||
replicas: 2 # Stateless, can scale horizontally
|
||||
replicas: 2
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 10s
|
||||
@@ -514,24 +384,20 @@ services:
|
||||
- spread: node.hostname
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M # Larger for window aggregation
|
||||
memory: 512M
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 256M
|
||||
cpus: '0.5'
|
||||
# Traefik routing for admin API
|
||||
labels:
|
||||
- traefik.enable=true
|
||||
- traefik.http.routers.backbeat-reverb.rule=Host(`backbeat-reverb.chorus.services`)
|
||||
- traefik.http.routers.backbeat-reverb.tls=true
|
||||
- traefik.http.routers.backbeat-reverb.tls.certresolver=letsencryptresolver
|
||||
- traefik.http.services.backbeat-reverb.loadbalancer.server.port=8080
|
||||
|
||||
networks:
|
||||
- chorus_net
|
||||
- tengig # External network for Traefik
|
||||
|
||||
# Container logging
|
||||
- chorus_ipvlan
|
||||
- tengig
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@@ -539,8 +405,6 @@ services:
|
||||
max-file: "3"
|
||||
tag: "backbeat-reverb/{{.Name}}/{{.ID}}"
|
||||
|
||||
# NATS Message Broker - Use existing or deploy dedicated instance
|
||||
# REQ: BACKBEAT-INT-001 - Topics via NATS for at-least-once delivery
|
||||
backbeat-nats:
|
||||
image: nats:2.9-alpine
|
||||
command: ["--jetstream"]
|
||||
@@ -562,8 +426,7 @@ services:
|
||||
memory: 128M
|
||||
cpus: '0.25'
|
||||
networks:
|
||||
- chorus_net
|
||||
# Container logging
|
||||
- chorus_ipvlan
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
@@ -571,10 +434,55 @@ services:
|
||||
max-file: "3"
|
||||
tag: "nats/{{.Name}}/{{.ID}}"
|
||||
|
||||
# KACHING services are deployed separately in their own stack
|
||||
# License validation will access https://kaching.chorus.services/api
|
||||
shepherd:
|
||||
image: containrrr/shepherd:latest
|
||||
environment:
|
||||
SLEEP_TIME: "5m"
|
||||
FILTER_SERVICES: "label=shepherd.autodeploy=true"
|
||||
WITH_REGISTRY_AUTH: "true"
|
||||
ROLLBACK_ON_FAILURE: "true"
|
||||
TZ: "UTC"
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
placement:
|
||||
constraints:
|
||||
- node.role == manager
|
||||
|
||||
hmmm-monitor:
|
||||
image: docker.io/anthonyrawlins/hmmm-monitor:latest
|
||||
environment:
|
||||
- WHOOSH_API_BASE_URL=http://swoosh:8080
|
||||
ports:
|
||||
- "9001:9001"
|
||||
deploy:
|
||||
labels:
|
||||
- shepherd.autodeploy=true
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 3
|
||||
window: 120s
|
||||
resources:
|
||||
limits:
|
||||
memory: 128M
|
||||
cpus: '0.25'
|
||||
reservations:
|
||||
memory: 64M
|
||||
cpus: '0.1'
|
||||
networks:
|
||||
- chorus_ipvlan
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
tag: "hmmm-monitor/{{.Name}}/{{.ID}}"
|
||||
|
||||
# Persistent volumes
|
||||
volumes:
|
||||
prometheus_data:
|
||||
driver: local
|
||||
@@ -596,6 +504,12 @@ volumes:
|
||||
device: /rust/containers/CHORUS/monitoring/grafana
|
||||
chorus_data:
|
||||
driver: local
|
||||
swoosh_data:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: /rust/containers/SWOOSH/data
|
||||
whoosh_postgres_data:
|
||||
driver: local
|
||||
driver_opts:
|
||||
@@ -608,17 +522,19 @@ volumes:
|
||||
type: none
|
||||
o: bind
|
||||
device: /rust/containers/WHOOSH/redis
|
||||
whoosh_ui:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: /rust/containers/WHOOSH/ui
|
||||
|
||||
|
||||
# Networks for CHORUS communication
|
||||
networks:
|
||||
tengig:
|
||||
external: true
|
||||
|
||||
chorus_net:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
|
||||
chorus_ipvlan:
|
||||
external: true
|
||||
|
||||
configs:
|
||||
chorus_bootstrap:
|
||||
@@ -630,7 +546,7 @@ secrets:
|
||||
name: chorus_license_id
|
||||
resetdata_api_key:
|
||||
external: true
|
||||
name: resetdata_api_key
|
||||
name: resetdata_api_key_v2
|
||||
whoosh_db_password:
|
||||
external: true
|
||||
name: whoosh_db_password
|
||||
@@ -642,7 +558,7 @@ secrets:
|
||||
name: whoosh_webhook_token
|
||||
jwt_secret:
|
||||
external: true
|
||||
name: whoosh_jwt_secret
|
||||
name: whoosh_jwt_secret_v4
|
||||
service_tokens:
|
||||
external: true
|
||||
name: whoosh_service_tokens
|
||||
|
||||
Reference in New Issue
Block a user