Prepare for v2 development: Add MCP integration and future development planning
- Add FUTURE_DEVELOPMENT.md with comprehensive v2 protocol specification - Add MCP integration design and implementation foundation - Add infrastructure and deployment configurations - Update system architecture for v2 evolution 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
372
infrastructure/monitoring/docker-compose.monitoring.yml
Normal file
372
infrastructure/monitoring/docker-compose.monitoring.yml
Normal file
@@ -0,0 +1,372 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
# Prometheus for metrics collection
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.48.0
|
||||
networks:
|
||||
- tengig
|
||||
- monitoring
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- /rust/bzzz-v2/config/prometheus:/etc/prometheus:ro
|
||||
- /rust/bzzz-v2/data/prometheus:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--storage.tsdb.retention.time=30d'
|
||||
- '--storage.tsdb.retention.size=50GB'
|
||||
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
- '--web.console.templates=/etc/prometheus/consoles'
|
||||
- '--web.enable-lifecycle'
|
||||
- '--web.external-url=https://prometheus.deepblack.cloud'
|
||||
configs:
|
||||
- source: prometheus_config
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
- source: prometheus_rules
|
||||
target: /etc/prometheus/rules.yml
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == walnut
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
cpus: '2.0'
|
||||
reservations:
|
||||
memory: 2G
|
||||
cpus: '1.0'
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.prometheus.rule=Host(`prometheus.deepblack.cloud`)"
|
||||
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
||||
- "traefik.http.routers.prometheus.tls=true"
|
||||
|
||||
# Grafana for visualization
|
||||
grafana:
|
||||
image: grafana/grafana:10.2.0
|
||||
networks:
|
||||
- tengig
|
||||
- monitoring
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD__FILE=/run/secrets/grafana_admin_password
|
||||
- GF_SERVER_ROOT_URL=https://grafana.deepblack.cloud
|
||||
- GF_SERVER_DOMAIN=grafana.deepblack.cloud
|
||||
- GF_ANALYTICS_REPORTING_ENABLED=false
|
||||
- GF_ANALYTICS_CHECK_FOR_UPDATES=false
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
- GF_INSTALL_PLUGINS=grafana-piechart-panel,grafana-worldmap-panel
|
||||
volumes:
|
||||
- /rust/bzzz-v2/data/grafana:/var/lib/grafana
|
||||
- /rust/bzzz-v2/config/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
secrets:
|
||||
- grafana_admin_password
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == walnut
|
||||
resources:
|
||||
limits:
|
||||
memory: 2G
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 1G
|
||||
cpus: '0.5'
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.grafana.rule=Host(`grafana.deepblack.cloud`)"
|
||||
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
||||
- "traefik.http.routers.grafana.tls=true"
|
||||
|
||||
# AlertManager for alerting
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.26.0
|
||||
networks:
|
||||
- tengig
|
||||
- monitoring
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- /rust/bzzz-v2/data/alertmanager:/alertmanager
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/config.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
- '--web.external-url=https://alerts.deepblack.cloud'
|
||||
configs:
|
||||
- source: alertmanager_config
|
||||
target: /etc/alertmanager/config.yml
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == ironwood
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 512M
|
||||
cpus: '0.25'
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.alertmanager.rule=Host(`alerts.deepblack.cloud`)"
|
||||
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
|
||||
- "traefik.http.routers.alertmanager.tls=true"
|
||||
|
||||
# Node Exporter for system metrics
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.6.1
|
||||
networks:
|
||||
- monitoring
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
- /etc/hostname:/etc/nodename:ro
|
||||
command:
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
- '--collector.textfile.directory=/var/lib/node_exporter/textfile_collector'
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 128M
|
||||
cpus: '0.25'
|
||||
|
||||
# cAdvisor for container metrics
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:v0.47.0
|
||||
networks:
|
||||
- monitoring
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker:/var/lib/docker:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
command:
|
||||
- '--housekeeping_interval=10s'
|
||||
- '--docker_only=true'
|
||||
- '--disable_metrics=percpu,process,sched,tcp,udp,disk,diskIO,accelerator,hugetlb,referenced_memory,cpu_topology,resctrl'
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
cpus: '0.5'
|
||||
reservations:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
|
||||
# BZZZ P2P Metrics Exporter
|
||||
bzzz-p2p-exporter:
|
||||
image: registry.home.deepblack.cloud/bzzz/p2p-exporter:v2.0.0
|
||||
networks:
|
||||
- monitoring
|
||||
- bzzz-internal
|
||||
ports:
|
||||
- "9200:9200"
|
||||
environment:
|
||||
- BZZZ_AGENT_ENDPOINTS=http://bzzz-v2_bzzz-agent:9000
|
||||
- DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103
|
||||
- METRICS_PORT=9200
|
||||
- SCRAPE_INTERVAL=30s
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == acacia
|
||||
resources:
|
||||
limits:
|
||||
memory: 512M
|
||||
cpus: '0.5'
|
||||
|
||||
# DHT Network Monitor
|
||||
dht-monitor:
|
||||
image: registry.home.deepblack.cloud/bzzz/dht-monitor:v2.0.0
|
||||
networks:
|
||||
- monitoring
|
||||
- bzzz-internal
|
||||
ports:
|
||||
- "9201:9201"
|
||||
environment:
|
||||
- DHT_BOOTSTRAP_NODES=walnut:9101,ironwood:9102,acacia:9103
|
||||
- MONITOR_PORT=9201
|
||||
- PEER_CHECK_INTERVAL=60s
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
|
||||
# Content Store Monitor
|
||||
content-monitor:
|
||||
image: registry.home.deepblack.cloud/bzzz/content-monitor:v2.0.0
|
||||
networks:
|
||||
- monitoring
|
||||
- bzzz-internal
|
||||
ports:
|
||||
- "9202:9202"
|
||||
environment:
|
||||
- CONTENT_STORE_PATH=/rust/bzzz-v2/data/blobs
|
||||
- MONITOR_PORT=9202
|
||||
- CHECK_INTERVAL=300s
|
||||
volumes:
|
||||
- /rust/bzzz-v2/data/blobs:/data/blobs:ro
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
|
||||
# OpenAI Cost Monitor
|
||||
openai-cost-monitor:
|
||||
image: registry.home.deepblack.cloud/bzzz/openai-cost-monitor:v2.0.0
|
||||
networks:
|
||||
- monitoring
|
||||
- bzzz-internal
|
||||
ports:
|
||||
- "9203:9203"
|
||||
environment:
|
||||
- POSTGRES_HOST=bzzz-v2_postgres
|
||||
- POSTGRES_DB=bzzz_v2
|
||||
- POSTGRES_USER=bzzz
|
||||
- MONITOR_PORT=9203
|
||||
- COST_ALERT_THRESHOLD=100.00
|
||||
secrets:
|
||||
- postgres_password
|
||||
deploy:
|
||||
replicas: 1
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
|
||||
# Log aggregation with Loki
|
||||
loki:
|
||||
image: grafana/loki:2.9.0
|
||||
networks:
|
||||
- monitoring
|
||||
ports:
|
||||
- "3100:3100"
|
||||
volumes:
|
||||
- /rust/bzzz-v2/data/loki:/loki
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
configs:
|
||||
- source: loki_config
|
||||
target: /etc/loki/local-config.yaml
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == acacia
|
||||
resources:
|
||||
limits:
|
||||
memory: 2G
|
||||
cpus: '1.0'
|
||||
reservations:
|
||||
memory: 1G
|
||||
cpus: '0.5'
|
||||
|
||||
# Promtail for log shipping
|
||||
promtail:
|
||||
image: grafana/promtail:2.9.0
|
||||
networks:
|
||||
- monitoring
|
||||
volumes:
|
||||
- /var/log:/var/log:ro
|
||||
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
||||
- /rust/bzzz-v2/logs:/app/logs:ro
|
||||
command: -config.file=/etc/promtail/config.yml
|
||||
configs:
|
||||
- source: promtail_config
|
||||
target: /etc/promtail/config.yml
|
||||
deploy:
|
||||
mode: global
|
||||
resources:
|
||||
limits:
|
||||
memory: 256M
|
||||
cpus: '0.25'
|
||||
|
||||
# Jaeger for distributed tracing
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:1.49
|
||||
networks:
|
||||
- tengig
|
||||
- monitoring
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "14268:14268"
|
||||
environment:
|
||||
- COLLECTOR_OTLP_ENABLED=true
|
||||
- SPAN_STORAGE_TYPE=badger
|
||||
- BADGER_EPHEMERAL=false
|
||||
- BADGER_DIRECTORY_VALUE=/badger/data
|
||||
- BADGER_DIRECTORY_KEY=/badger/key
|
||||
volumes:
|
||||
- /rust/bzzz-v2/data/jaeger:/badger
|
||||
deploy:
|
||||
replicas: 1
|
||||
placement:
|
||||
constraints:
|
||||
- node.hostname == ironwood
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
cpus: '0.5'
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.jaeger.rule=Host(`tracing.deepblack.cloud`)"
|
||||
- "traefik.http.services.jaeger.loadbalancer.server.port=16686"
|
||||
- "traefik.http.routers.jaeger.tls=true"
|
||||
|
||||
networks:
|
||||
tengig:
|
||||
external: true
|
||||
monitoring:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
bzzz-internal:
|
||||
external: true
|
||||
|
||||
secrets:
|
||||
grafana_admin_password:
|
||||
external: true
|
||||
name: bzzz_grafana_admin_password
|
||||
postgres_password:
|
||||
external: true
|
||||
name: bzzz_postgres_password
|
||||
|
||||
configs:
|
||||
prometheus_config:
|
||||
external: true
|
||||
name: bzzz_prometheus_config
|
||||
prometheus_rules:
|
||||
external: true
|
||||
name: bzzz_prometheus_rules
|
||||
alertmanager_config:
|
||||
external: true
|
||||
name: bzzz_alertmanager_config
|
||||
loki_config:
|
||||
external: true
|
||||
name: bzzz_loki_config
|
||||
promtail_config:
|
||||
external: true
|
||||
name: bzzz_promtail_config
|
||||
Reference in New Issue
Block a user