version: '3.8' services: # Prometheus for metrics collection prometheus: image: prom/prometheus:v2.48.0 networks: - tengig - monitoring ports: - "9090:9090" volumes: - /rust/bzzz-v2/config/prometheus:/etc/prometheus:ro - /rust/bzzz-v2/data/prometheus:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--storage.tsdb.retention.size=50GB' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' - '--web.external-url=https://prometheus.deepblack.cloud' configs: - source: prometheus_config target: /etc/prometheus/prometheus.yml - source: prometheus_rules target: /etc/prometheus/rules.yml deploy: replicas: 1 placement: constraints: - node.hostname == walnut resources: limits: memory: 4G cpus: '2.0' reservations: memory: 2G cpus: '1.0' labels: - "traefik.enable=true" - "traefik.http.routers.prometheus.rule=Host(`prometheus.deepblack.cloud`)" - "traefik.http.services.prometheus.loadbalancer.server.port=9090" - "traefik.http.routers.prometheus.tls=true" # Grafana for visualization grafana: image: grafana/grafana:10.2.0 networks: - tengig - monitoring ports: - "3000:3000" environment: - GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password - GF_SERVER_ROOT_URL=https://grafana.deepblack.cloud - GF_SERVER_DOMAIN=grafana.deepblack.cloud - GF_ANALYTICS_REPORTING_ENABLED=false - GF_ANALYTICS_CHECK_FOR_UPDATES=false - GF_USERS_ALLOW_SIGN_UP=false - GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel volumes: - /rust/bzzz-v2/data/grafana:/var/lib/grafana - /rust/bzzz-v2/config/grafana/provisioning:/etc/grafana/provisioning:ro secrets: - grafana_admin_password deploy: replicas: 1 placement: constraints: - node.hostname == walnut resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.5' labels: - "traefik.enable=true" - "traefik.http.routers.grafana.rule=Host(`grafana.deepblack.cloud`)" - "traefik.http.services.grafana.loadbalancer.server.port=3000" - "traefik.http.routers.grafana.tls=true" # AlertManager for alerting alertmanager: image: prom/alertmanager:v0.26.0 networks: - tengig - monitoring ports: - "9093:9093" volumes: - /rust/bzzz-v2/data/alertmanager:/alertmanager command: - '--config.file=/etc/alertmanager/config.yml' - '--storage.path=/alertmanager' - '--web.external-url=https://alerts.deepblack.cloud' configs: - source: alertmanager_config target: /etc/alertmanager/config.yml deploy: replicas: 1 placement: constraints: - node.hostname == ironwood resources: limits: memory: 1G cpus: '0.5' reservations: memory: 512M cpus: '0.25' labels: - "traefik.enable=true" - "traefik.http.routers.alertmanager.rule=Host(`alerts.deepblack.cloud`)" - "traefik.http.services.alertmanager.loadbalancer.server.port=9093" - "traefik.http.routers.alertmanager.tls=true" # Node Exporter for system metrics node-exporter: image: prom/node-exporter:v1.6.1 networks: - monitoring ports: - "9100:9100" volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro - /etc/hostname:/etc/nodename:ro command: - '--path.procfs=/host/proc' - '--path.rootfs=/rootfs' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' - '--collector.textfile.directory=/var/lib/node_exporter/textfile_collector' deploy: mode: global resources: limits: memory: 256M cpus: '0.5' reservations: memory: 128M cpus: '0.25' # cAdvisor for container metrics cadvisor: image: gcr.io/cadvisor/cadvisor:v0.47.0 networks: - monitoring ports: - "8080:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:rw - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro - /var/run/docker.sock:/var/run/docker.sock:ro command: - '--housekeeping_interval=10s' - '--docker_only=true' - '--disable_metrics=percpu,process,sched,tcp,udp,disk,diskIO,accelerator,hugetlb,referenced_memory,cpu_topology,resctrl' deploy: mode: global resources: limits: memory: 512M cpus: '0.5' reservations: memory: 256M cpus: '0.25' # BZZZ P2P Metrics Exporter bzzz-p2p-exporter: image: registry.home.deepblack.cloud/bzzz/p2p-exporter:v2.0.0 networks: - monitoring - bzzz-internal ports: - "9200:9200" environment: - BZZZ_AGENT_ENDPOINTS=http://bzzz-v2_bzzz-agent:9000 - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 - METRICS_PORT=9200 - SCRAPE_INTERVAL=30s deploy: replicas: 1 placement: constraints: - node.hostname == acacia resources: limits: memory: 512M cpus: '0.5' # DHT Network Monitor dht-monitor: image: registry.home.deepblack.cloud/bzzz/dht-monitor:v2.0.0 networks: - monitoring - bzzz-internal ports: - "9201:9201" environment: - DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103 - MONITOR_PORT=9201 - PEER_CHECK_INTERVAL=60s deploy: replicas: 1 resources: limits: memory: 256M cpus: '0.25' # Content Store Monitor content-monitor: image: registry.home.deepblack.cloud/bzzz/content-monitor:v2.0.0 networks: - monitoring - bzzz-internal ports: - "9202:9202" environment: - CONTENT_STORE_PATH=/rust/bzzz-v2/data/blobs - MONITOR_PORT=9202 - CHECK_INTERVAL=300s volumes: - /rust/bzzz-v2/data/blobs:/data/blobs:ro deploy: replicas: 1 resources: limits: memory: 256M cpus: '0.25' # OpenAI Cost Monitor openai-cost-monitor: image: registry.home.deepblack.cloud/bzzz/openai-cost-monitor:v2.0.0 networks: - monitoring - bzzz-internal ports: - "9203:9203" environment: - POSTGRES_HOST=bzzz-v2_postgres - POSTGRES_DB=bzzz_v2 - POSTGRES_USER=bzzz - MONITOR_PORT=9203 - COST_ALERT_THRESHOLD=100.00 secrets: - postgres_password deploy: replicas: 1 resources: limits: memory: 256M cpus: '0.25' # Log aggregation with Loki loki: image: grafana/loki:2.9.0 networks: - monitoring ports: - "3100:3100" volumes: - /rust/bzzz-v2/data/loki:/loki command: -config.file=/etc/loki/local-config.yaml configs: - source: loki_config target: /etc/loki/local-config.yaml deploy: replicas: 1 placement: constraints: - node.hostname == acacia resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.5' # Promtail for log shipping promtail: image: grafana/promtail:2.9.0 networks: - monitoring volumes: - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro - /rust/bzzz-v2/logs:/app/logs:ro command: -config.file=/etc/promtail/config.yml configs: - source: promtail_config target: /etc/promtail/config.yml deploy: mode: global resources: limits: memory: 256M cpus: '0.25' # Jaeger for distributed tracing jaeger: image: jaegertracing/all-in-one:1.49 networks: - tengig - monitoring ports: - "16686:16686" - "14268:14268" environment: - COLLECTOR_OTLP_ENABLED=true - SPAN_STORAGE_TYPE=badger - BADGER_EPHEMERAL=false - BADGER_DIRECTORY_VALUE=/badger/data - BADGER_DIRECTORY_KEY=/badger/key volumes: - /rust/bzzz-v2/data/jaeger:/badger deploy: replicas: 1 placement: constraints: - node.hostname == ironwood resources: limits: memory: 1G cpus: '0.5' labels: - "traefik.enable=true" - "traefik.http.routers.jaeger.rule=Host(`tracing.deepblack.cloud`)" - "traefik.http.services.jaeger.loadbalancer.server.port=16686" - "traefik.http.routers.jaeger.tls=true" networks: tengig: external: true monitoring: driver: overlay attachable: true bzzz-internal: external: true secrets: grafana_admin_password: external: true name: bzzz_grafana_admin_password postgres_password: external: true name: bzzz_postgres_password configs: prometheus_config: external: true name: bzzz_prometheus_config prometheus_rules: external: true name: bzzz_prometheus_rules alertmanager_config: external: true name: bzzz_alertmanager_config loki_config: external: true name: bzzz_loki_config promtail_config: external: true name: bzzz_promtail_config