- Add FUTURE_DEVELOPMENT.md with comprehensive v2 protocol specification - Add MCP integration design and implementation foundation - Add infrastructure and deployment configurations - Update system architecture for v2 evolution 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
216 lines
6.2 KiB
YAML
216 lines
6.2 KiB
YAML
# Prometheus Configuration for BZZZ v2 Monitoring
|
|
|
|
global:
|
|
scrape_interval: 30s
|
|
scrape_timeout: 10s
|
|
evaluation_interval: 30s
|
|
external_labels:
|
|
cluster: 'deepblack-cloud'
|
|
environment: 'production'
|
|
|
|
rule_files:
|
|
- "/etc/prometheus/rules.yml"
|
|
|
|
alerting:
|
|
alertmanagers:
|
|
- static_configs:
|
|
- targets:
|
|
- alertmanager:9093
|
|
|
|
scrape_configs:
|
|
# Prometheus self-monitoring
|
|
- job_name: 'prometheus'
|
|
static_configs:
|
|
- targets: ['localhost:9090']
|
|
metrics_path: /metrics
|
|
scrape_interval: 15s
|
|
|
|
# System metrics from node exporters
|
|
- job_name: 'node-exporter'
|
|
static_configs:
|
|
- targets:
|
|
- 'walnut:9100'
|
|
- 'ironwood:9100'
|
|
- 'acacia:9100'
|
|
metrics_path: /metrics
|
|
scrape_interval: 15s
|
|
|
|
# Container metrics from cAdvisor
|
|
- job_name: 'cadvisor'
|
|
static_configs:
|
|
- targets:
|
|
- 'walnut:8080'
|
|
- 'ironwood:8080'
|
|
- 'acacia:8080'
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# BZZZ v2 Application Services
|
|
- job_name: 'bzzz-agent'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 9000
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: __tmp_service_name
|
|
- source_labels: [__tmp_service_name]
|
|
regex: bzzz-v2_bzzz-agent
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_node_id]
|
|
target_label: node_id
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 15s
|
|
|
|
# MCP Server Metrics
|
|
- job_name: 'mcp-server'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 3001
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_mcp-server
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# OpenAI Proxy Metrics
|
|
- job_name: 'openai-proxy'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 3002
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_openai-proxy
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# Content Resolver Metrics
|
|
- job_name: 'content-resolver'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 3003
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_content-resolver
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# DHT Bootstrap Nodes
|
|
- job_name: 'dht-bootstrap'
|
|
static_configs:
|
|
- targets:
|
|
- 'walnut:9101'
|
|
- 'ironwood:9102'
|
|
- 'acacia:9103'
|
|
labels:
|
|
service: 'dht-bootstrap'
|
|
metrics_path: /metrics
|
|
scrape_interval: 15s
|
|
|
|
# P2P Network Metrics
|
|
- job_name: 'bzzz-p2p-exporter'
|
|
static_configs:
|
|
- targets: ['bzzz-p2p-exporter:9200']
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# DHT Network Monitoring
|
|
- job_name: 'dht-monitor'
|
|
static_configs:
|
|
- targets: ['dht-monitor:9201']
|
|
metrics_path: /metrics
|
|
scrape_interval: 60s
|
|
|
|
# Content Store Monitoring
|
|
- job_name: 'content-monitor'
|
|
static_configs:
|
|
- targets: ['content-monitor:9202']
|
|
metrics_path: /metrics
|
|
scrape_interval: 300s # 5 minutes for storage checks
|
|
|
|
# OpenAI Cost Monitoring
|
|
- job_name: 'openai-cost-monitor'
|
|
static_configs:
|
|
- targets: ['openai-cost-monitor:9203']
|
|
metrics_path: /metrics
|
|
scrape_interval: 60s
|
|
|
|
# Database Metrics (PostgreSQL)
|
|
- job_name: 'postgres'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 5432
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_postgres
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
params:
|
|
dbname: [bzzz_v2]
|
|
|
|
# Cache Metrics (Redis)
|
|
- job_name: 'redis'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 6379
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_redis
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# Traefik Load Balancer Metrics
|
|
- job_name: 'traefik'
|
|
static_configs:
|
|
- targets: ['traefik:8080']
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# Conversation Management Metrics
|
|
- job_name: 'conversation-manager'
|
|
docker_sd_configs:
|
|
- host: unix:///var/run/docker.sock
|
|
port: 8090
|
|
relabel_configs:
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
regex: bzzz-v2_conversation-manager
|
|
action: keep
|
|
- source_labels: [__meta_docker_container_label_com_docker_swarm_service_name]
|
|
target_label: service
|
|
metrics_path: /metrics
|
|
scrape_interval: 30s
|
|
|
|
# External Service Monitoring (Webhook endpoints)
|
|
- job_name: 'external-health'
|
|
static_configs:
|
|
- targets:
|
|
- 'bzzz.deepblack.cloud'
|
|
- 'mcp.deepblack.cloud'
|
|
- 'resolve.deepblack.cloud'
|
|
- 'openai.deepblack.cloud'
|
|
metrics_path: /health
|
|
scrape_interval: 60s
|
|
scrape_timeout: 10s
|
|
|
|
# Remote write configuration for long-term storage (optional)
|
|
# remote_write:
|
|
# - url: "https://prometheus-remote-write.example.com/api/v1/write"
|
|
# basic_auth:
|
|
# username: "bzzz-cluster"
|
|
# password_file: "/etc/prometheus/remote-write-password" |