# Prometheus Configuration for BZZZ v2 Monitoring global: scrape_interval: 30s scrape_timeout: 10s evaluation_interval: 30s external_labels: cluster: 'deepblack-cloud' environment: 'production' rule_files: - "/etc/prometheus/rules.yml" alerting: alertmanagers: - static_configs: - targets: - alertmanager:9093 scrape_configs: # Prometheus self-monitoring - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] metrics_path: /metrics scrape_interval: 15s # System metrics from node exporters - job_name: 'node-exporter' static_configs: - targets: - 'walnut:9100' - 'ironwood:9100' - 'acacia:9100' metrics_path: /metrics scrape_interval: 15s # Container metrics from cAdvisor - job_name: 'cadvisor' static_configs: - targets: - 'walnut:8080' - 'ironwood:8080' - 'acacia:8080' metrics_path: /metrics scrape_interval: 30s # BZZZ v2 Application Services - job_name: 'bzzz-agent' docker_sd_configs: - host: unix:///var/run/docker.sock port: 9000 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: __tmp_service_name - source_labels: [__tmp_service_name] regex: bzzz-v2_bzzz-agent action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_node_id] target_label: node_id - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 15s # MCP Server Metrics - job_name: 'mcp-server' docker_sd_configs: - host: unix:///var/run/docker.sock port: 3001 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_mcp-server action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s # OpenAI Proxy Metrics - job_name: 'openai-proxy' docker_sd_configs: - host: unix:///var/run/docker.sock port: 3002 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_openai-proxy action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s # Content Resolver Metrics - job_name: 'content-resolver' docker_sd_configs: - host: unix:///var/run/docker.sock port: 3003 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_content-resolver action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s # DHT Bootstrap Nodes - job_name: 'dht-bootstrap' static_configs: - targets: - 'walnut:9101' - 'ironwood:9102' - 'acacia:9103' labels: service: 'dht-bootstrap' metrics_path: /metrics scrape_interval: 15s # P2P Network Metrics - job_name: 'bzzz-p2p-exporter' static_configs: - targets: ['bzzz-p2p-exporter:9200'] metrics_path: /metrics scrape_interval: 30s # DHT Network Monitoring - job_name: 'dht-monitor' static_configs: - targets: ['dht-monitor:9201'] metrics_path: /metrics scrape_interval: 60s # Content Store Monitoring - job_name: 'content-monitor' static_configs: - targets: ['content-monitor:9202'] metrics_path: /metrics scrape_interval: 300s # 5 minutes for storage checks # OpenAI Cost Monitoring - job_name: 'openai-cost-monitor' static_configs: - targets: ['openai-cost-monitor:9203'] metrics_path: /metrics scrape_interval: 60s # Database Metrics (PostgreSQL) - job_name: 'postgres' docker_sd_configs: - host: unix:///var/run/docker.sock port: 5432 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_postgres action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s params: dbname: [bzzz_v2] # Cache Metrics (Redis) - job_name: 'redis' docker_sd_configs: - host: unix:///var/run/docker.sock port: 6379 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_redis action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s # Traefik Load Balancer Metrics - job_name: 'traefik' static_configs: - targets: ['traefik:8080'] metrics_path: /metrics scrape_interval: 30s # Conversation Management Metrics - job_name: 'conversation-manager' docker_sd_configs: - host: unix:///var/run/docker.sock port: 8090 relabel_configs: - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] regex: bzzz-v2_conversation-manager action: keep - source_labels: [__meta_docker_container_label_com_docker_swarm_service_name] target_label: service metrics_path: /metrics scrape_interval: 30s # External Service Monitoring (Webhook endpoints) - job_name: 'external-health' static_configs: - targets: - 'bzzz.deepblack.cloud' - 'mcp.deepblack.cloud' - 'resolve.deepblack.cloud' - 'openai.deepblack.cloud' metrics_path: /health scrape_interval: 60s scrape_timeout: 10s # Remote write configuration for long-term storage (optional) # remote_write: # - url: "https://prometheus-remote-write.example.com/api/v1/write" # basic_auth: # username: "bzzz-cluster" # password_file: "/etc/prometheus/remote-write-password"