Files
CHORUS/docker/docker-compose.yml
anthonyrawlins 2578876eeb feat: Add Docker secrets support for ResetData API key
This commit introduces secure Docker secrets integration for the ResetData
API key, enabling CHORUS to read sensitive configuration from mounted secret
files instead of environment variables.

## Key Changes:

**Security Enhancement:**
- Modified `pkg/config/config.go` to support reading ResetData API key from
  Docker secret files using `getEnvOrFileContent()` pattern
- Enables secure deployment with `RESETDATA_API_KEY_FILE` pointing to
  mounted secret file instead of plain text environment variables

**Container Deployment:**
- Added `Dockerfile.simple` for optimized Alpine-based deployment using
  pre-built static binaries (chorus-agent)
- Updated `docker-compose.yml` with proper secret mounting configuration
- Fixed container binary path to use new `chorus-agent` instead of deprecated
  `chorus` wrapper

**WHOOSH Integration:**
- Critical for WHOOSH wave-based auto-scaling system integration
- Enables secure credential management in Docker Swarm deployments
- Supports dynamic scaling operations while maintaining security standards

## Technical Details:

The ResetData configuration now supports both environment variable fallback
and Docker secrets:
```go
APIKey: getEnvOrFileContent("RESETDATA_API_KEY", "RESETDATA_API_KEY_FILE")
```

This change enables CHORUS to participate in WHOOSH's wave-based scaling
architecture while maintaining production-grade security for API credentials.

## Testing:

- Verified successful deployment in Docker Swarm environment
- Confirmed CHORUS agent initialization with secret-based configuration
- Validated integration with BACKBEAT and P2P networking components

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-22 15:00:50 +10:00

557 lines
15 KiB
YAML

version: "3.9"
services:
chorus:
image: anthonyrawlins/chorus:resetdata-secrets-v1.0.5
# REQUIRED: License configuration (CHORUS will not start without this)
environment:
# CRITICAL: License configuration - REQUIRED for operation
- CHORUS_LICENSE_ID_FILE=/run/secrets/chorus_license_id
- CHORUS_CLUSTER_ID=${CHORUS_CLUSTER_ID:-docker-cluster}
- CHORUS_KACHING_URL=${CHORUS_KACHING_URL:-https://kaching.chorus.services/api}
# Agent configuration
- CHORUS_AGENT_ID=${CHORUS_AGENT_ID:-} # Auto-generated if not provided
- CHORUS_SPECIALIZATION=${CHORUS_SPECIALIZATION:-general_developer}
- CHORUS_MAX_TASKS=${CHORUS_MAX_TASKS:-3}
- CHORUS_CAPABILITIES=${CHORUS_CAPABILITIES:-general_development,task_coordination}
# Network configuration
- CHORUS_API_PORT=8080
- CHORUS_HEALTH_PORT=8081
- CHORUS_P2P_PORT=9000
- CHORUS_BIND_ADDRESS=0.0.0.0
# AI configuration - Provider selection
- CHORUS_AI_PROVIDER=${CHORUS_AI_PROVIDER:-resetdata}
# ResetData configuration (default provider)
- RESETDATA_BASE_URL=${RESETDATA_BASE_URL:-https://models.au-syd.resetdata.ai/v1}
- RESETDATA_API_KEY_FILE=/run/secrets/resetdata_api_key
- RESETDATA_MODEL=${RESETDATA_MODEL:-meta/llama-3.1-8b-instruct}
# Ollama configuration (alternative provider)
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-http://host.docker.internal:11434}
# Model configuration
- CHORUS_MODELS=${CHORUS_MODELS:-meta/llama-3.1-8b-instruct}
- CHORUS_DEFAULT_REASONING_MODEL=${CHORUS_DEFAULT_REASONING_MODEL:-meta/llama-3.1-8b-instruct}
# Logging configuration
- LOG_LEVEL=${LOG_LEVEL:-info}
- LOG_FORMAT=${LOG_FORMAT:-structured}
# BACKBEAT configuration
- CHORUS_BACKBEAT_ENABLED=${CHORUS_BACKBEAT_ENABLED:-true}
- CHORUS_BACKBEAT_CLUSTER_ID=${CHORUS_BACKBEAT_CLUSTER_ID:-chorus-production}
- CHORUS_BACKBEAT_AGENT_ID=${CHORUS_BACKBEAT_AGENT_ID:-} # Auto-generated from CHORUS_AGENT_ID
- CHORUS_BACKBEAT_NATS_URL=${CHORUS_BACKBEAT_NATS_URL:-nats://backbeat-nats:4222}
# Prompt sourcing (mounted volume)
- CHORUS_PROMPTS_DIR=/etc/chorus/prompts
- CHORUS_DEFAULT_INSTRUCTIONS_PATH=/etc/chorus/prompts/defaults.md
- CHORUS_ROLE=${CHORUS_ROLE:-arbiter}
# Docker secrets for sensitive configuration
secrets:
- chorus_license_id
- resetdata_api_key
# Persistent data storage
volumes:
- chorus_data:/app/data
# Mount prompts directory read-only for role YAMLs and defaults.md
- /rust/containers/WHOOSH/prompts:/etc/chorus/prompts:ro
# Network ports
ports:
- "${CHORUS_P2P_PORT:-9000}:9000" # P2P communication
# Container resource limits
deploy:
mode: replicated
replicas: ${CHORUS_REPLICAS:-1}
update_config:
parallelism: 1
delay: 10s
failure_action: pause
order: start-first
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
resources:
limits:
cpus: "${CHORUS_CPU_LIMIT:-1.0}"
memory: "${CHORUS_MEMORY_LIMIT:-1G}"
reservations:
cpus: "0.1"
memory: 128M
placement:
constraints:
- node.hostname != rosewood
- node.hostname != acacia
preferences:
- spread: node.hostname
# CHORUS is internal-only, no Traefik labels needed
# Network configuration
networks:
- chorus_net
# Host resolution for external services
extra_hosts:
- "host.docker.internal:host-gateway"
# Container logging configuration
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
tag: "{{.ImageName}}/{{.Name}}/{{.ID}}"
# Health check configuration
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8081/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
whoosh:
image: anthonyrawlins/whoosh:scaling-v1.0.0
ports:
- target: 8080
published: 8800
protocol: tcp
mode: ingress
environment:
# Database configuration
WHOOSH_DATABASE_DB_HOST: postgres
WHOOSH_DATABASE_DB_PORT: 5432
WHOOSH_DATABASE_DB_NAME: whoosh
WHOOSH_DATABASE_DB_USER: whoosh
WHOOSH_DATABASE_DB_PASSWORD_FILE: /run/secrets/whoosh_db_password
WHOOSH_DATABASE_DB_SSL_MODE: disable
WHOOSH_DATABASE_DB_AUTO_MIGRATE: "true"
# Server configuration
WHOOSH_SERVER_LISTEN_ADDR: ":8080"
WHOOSH_SERVER_READ_TIMEOUT: "30s"
WHOOSH_SERVER_WRITE_TIMEOUT: "30s"
WHOOSH_SERVER_SHUTDOWN_TIMEOUT: "30s"
# GITEA configuration
WHOOSH_GITEA_BASE_URL: https://gitea.chorus.services
WHOOSH_GITEA_TOKEN_FILE: /run/secrets/gitea_token
WHOOSH_GITEA_WEBHOOK_TOKEN_FILE: /run/secrets/webhook_token
WHOOSH_GITEA_WEBHOOK_PATH: /webhooks/gitea
# Auth configuration
WHOOSH_AUTH_JWT_SECRET_FILE: /run/secrets/jwt_secret
WHOOSH_AUTH_SERVICE_TOKENS_FILE: /run/secrets/service_tokens
WHOOSH_AUTH_JWT_EXPIRY: "24h"
# Logging
WHOOSH_LOGGING_LEVEL: debug
WHOOSH_LOGGING_ENVIRONMENT: production
# Redis configuration
WHOOSH_REDIS_ENABLED: "true"
WHOOSH_REDIS_HOST: redis
WHOOSH_REDIS_PORT: 6379
WHOOSH_REDIS_PASSWORD_FILE: /run/secrets/redis_password
WHOOSH_REDIS_DATABASE: 0
# Scaling system configuration
WHOOSH_SCALING_KACHING_URL: "https://kaching.chorus.services"
WHOOSH_SCALING_BACKBEAT_URL: "http://backbeat-pulse:8080"
WHOOSH_SCALING_CHORUS_URL: "http://chorus:8080"
secrets:
- whoosh_db_password
- gitea_token
- webhook_token
- jwt_secret
- service_tokens
- redis_password
volumes:
- /var/run/docker.sock:/var/run/docker.sock
deploy:
replicas: 2
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
update_config:
parallelism: 1
delay: 10s
failure_action: pause
monitor: 60s
order: start-first
# rollback_config:
# parallelism: 1
# delay: 0s
# failure_action: pause
# monitor: 60s
# order: stop-first
placement:
constraints:
- node.hostname != acacia
preferences:
- spread: node.hostname
resources:
limits:
memory: 256M
cpus: '0.5'
reservations:
memory: 128M
cpus: '0.25'
labels:
- traefik.enable=true
- traefik.http.routers.whoosh.rule=Host(`whoosh.chorus.services`)
- traefik.http.routers.whoosh.tls=true
- traefik.http.routers.whoosh.tls.certresolver=letsencrypt
- traefik.http.services.whoosh.loadbalancer.server.port=8080
- traefik.http.middlewares.whoosh-auth.basicauth.users=admin:$$2y$$10$$example_hash
networks:
- tengig
- whoosh-backend
- chorus_net
healthcheck:
test: ["CMD", "/app/whoosh", "--health-check"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
postgres:
image: postgres:15-alpine
environment:
POSTGRES_DB: whoosh
POSTGRES_USER: whoosh
POSTGRES_PASSWORD_FILE: /run/secrets/whoosh_db_password
POSTGRES_INITDB_ARGS: --auth-host=scram-sha-256
secrets:
- whoosh_db_password
volumes:
- whoosh_postgres_data:/var/lib/postgresql/data
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
placement:
preferences:
- spread: node.hostname
resources:
limits:
memory: 512M
cpus: '1.0'
reservations:
memory: 256M
cpus: '0.5'
networks:
- whoosh-backend
- chorus_net
healthcheck:
test: ["CMD-SHELL", "pg_isready -U whoosh"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
redis:
image: redis:7-alpine
command: sh -c 'redis-server --requirepass "$$(cat /run/secrets/redis_password)" --appendonly yes'
secrets:
- redis_password
volumes:
- whoosh_redis_data:/data
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
placement:
preferences:
- spread: node.hostname
resources:
limits:
memory: 128M
cpus: '0.25'
reservations:
memory: 64M
cpus: '0.1'
networks:
- whoosh-backend
- chorus_net
healthcheck:
test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $$(cat /run/secrets/redis_password) ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# BACKBEAT Pulse Service - Leader-elected tempo broadcaster
# REQ: BACKBEAT-REQ-001 - Single BeatFrame publisher per cluster
# REQ: BACKBEAT-OPS-001 - One replica prefers leadership
backbeat-pulse:
image: anthonyrawlins/backbeat-pulse:v1.0.5
command: >
./pulse
-cluster=chorus-production
-admin-port=8080
-raft-bind=0.0.0.0:9000
-data-dir=/data
-nats=nats://backbeat-nats:4222
-tempo=2
-bar-length=8
-log-level=info
# Internal service ports (not externally exposed - routed via Traefik)
expose:
- "8080" # Admin API
- "9000" # Raft communication
# REQ: BACKBEAT-OPS-002 - Health probes for liveness/readiness
healthcheck:
test: ["CMD", "nc", "-z", "localhost", "8080"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
replicas: 1 # Single leader with automatic failover
restart_policy:
condition: on-failure
delay: 30s # Wait longer for NATS to be ready
max_attempts: 5
window: 120s
update_config:
parallelism: 1
delay: 30s # Wait for leader election
failure_action: pause
monitor: 60s
order: start-first
placement:
preferences:
- spread: node.hostname
constraints:
- node.hostname != rosewood # Avoid intermittent gaming PC
resources:
limits:
memory: 256M
cpus: '0.5'
reservations:
memory: 128M
cpus: '0.25'
# Traefik routing for admin API
labels:
- traefik.enable=true
- traefik.http.routers.backbeat-pulse.rule=Host(`backbeat-pulse.chorus.services`)
- traefik.http.routers.backbeat-pulse.tls=true
- traefik.http.routers.backbeat-pulse.tls.certresolver=letsencryptresolver
- traefik.http.services.backbeat-pulse.loadbalancer.server.port=8080
networks:
- chorus_net
- tengig # External network for Traefik
# Container logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
tag: "backbeat-pulse/{{.Name}}/{{.ID}}"
# BACKBEAT Reverb Service - StatusClaim aggregator
# REQ: BACKBEAT-REQ-020 - Subscribe to INT-B and group by window_id
# REQ: BACKBEAT-OPS-001 - Reverb can scale stateless
backbeat-reverb:
image: anthonyrawlins/backbeat-reverb:v1.0.2
command: >
./reverb
-cluster=chorus-production
-nats=nats://backbeat-nats:4222
-bar-length=8
-log-level=info
# Internal service ports (not externally exposed - routed via Traefik)
expose:
- "8080" # Admin API
# REQ: BACKBEAT-OPS-002 - Health probes for orchestration (temporarily disabled for testing)
# healthcheck:
# test: ["CMD", "nc", "-z", "localhost", "8080"]
# interval: 30s
# timeout: 10s
# retries: 3
# start_period: 60s
deploy:
replicas: 2 # Stateless, can scale horizontally
restart_policy:
condition: on-failure
delay: 10s
max_attempts: 3
window: 120s
update_config:
parallelism: 1
delay: 15s
failure_action: pause
monitor: 45s
order: start-first
placement:
preferences:
- spread: node.hostname
constraints:
- node.hostname != rosewood
resources:
limits:
memory: 512M # Larger for window aggregation
cpus: '1.0'
reservations:
memory: 256M
cpus: '0.5'
# Traefik routing for admin API
labels:
- traefik.enable=true
- traefik.http.routers.backbeat-reverb.rule=Host(`backbeat-reverb.chorus.services`)
- traefik.http.routers.backbeat-reverb.tls=true
- traefik.http.routers.backbeat-reverb.tls.certresolver=letsencryptresolver
- traefik.http.services.backbeat-reverb.loadbalancer.server.port=8080
networks:
- chorus_net
- tengig # External network for Traefik
# Container logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
tag: "backbeat-reverb/{{.Name}}/{{.ID}}"
# NATS Message Broker - Use existing or deploy dedicated instance
# REQ: BACKBEAT-INT-001 - Topics via NATS for at-least-once delivery
backbeat-nats:
image: nats:2.9-alpine
command: ["--jetstream"]
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 10s
max_attempts: 3
window: 120s
placement:
preferences:
- spread: node.hostname
constraints:
- node.hostname != rosewood
resources:
limits:
memory: 256M
cpus: '0.5'
reservations:
memory: 128M
cpus: '0.25'
networks:
- chorus_net
# Container logging
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
tag: "nats/{{.Name}}/{{.ID}}"
# KACHING services are deployed separately in their own stack
# License validation will access https://kaching.chorus.services/api
# Persistent volumes
volumes:
chorus_data:
driver: local
whoosh_postgres_data:
driver: local
driver_opts:
type: none
o: bind
device: /rust/containers/WHOOSH/postgres
whoosh_redis_data:
driver: local
driver_opts:
type: none
o: bind
device: /rust/containers/WHOOSH/redis
# Networks for CHORUS communication
networks:
tengig:
external: true
whoosh-backend:
driver: overlay
attachable: false
chorus_net:
driver: overlay
attachable: true
ipam:
config:
- subnet: 10.201.0.0/24
secrets:
chorus_license_id:
external: true
name: chorus_license_id
resetdata_api_key:
external: true
name: resetdata_api_key
whoosh_db_password:
external: true
name: whoosh_db_password
gitea_token:
external: true
name: gitea_token
webhook_token:
external: true
name: whoosh_webhook_token
jwt_secret:
external: true
name: whoosh_jwt_secret
service_tokens:
external: true
name: whoosh_service_tokens
redis_password:
external: true
name: whoosh_redis_password