# AlertManager Configuration for BZZZ v2 global: smtp_smarthost: 'localhost:587' smtp_from: 'alerts@deepblack.cloud' smtp_require_tls: true resolve_timeout: 5m # Template files templates: - '/etc/alertmanager/templates/*.tmpl' # Route configuration route: group_by: ['cluster', 'alertname', 'service'] group_wait: 30s group_interval: 5m repeat_interval: 12h receiver: 'default' routes: # Critical P2P network issues - match: severity: critical component: p2p receiver: 'p2p-critical' group_wait: 10s repeat_interval: 5m # DHT network issues - match: component: dht receiver: 'dht-alerts' group_wait: 1m repeat_interval: 30m # Content store issues - match: component: content-store receiver: 'storage-alerts' group_wait: 2m repeat_interval: 1h # OpenAI cost alerts - match: component: openai-cost receiver: 'cost-alerts' group_wait: 5m repeat_interval: 6h # Service health alerts - match: component: service-health receiver: 'service-alerts' group_wait: 1m repeat_interval: 15m # Resource exhaustion - match: severity: warning component: resources receiver: 'resource-alerts' group_wait: 5m repeat_interval: 2h # Security alerts - match: component: security receiver: 'security-alerts' group_wait: 30s repeat_interval: 1h # Inhibition rules inhibit_rules: # Silence warning if critical alert is firing - source_match: severity: 'critical' target_match: severity: 'warning' equal: ['cluster', 'service', 'instance'] # Silence service alerts if node is down - source_match: alertname: 'NodeDown' target_match: component: 'service-health' equal: ['instance'] # Receiver configurations receivers: # Default receiver - name: 'default' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-monitoring' title: 'BZZZ v2 Alert' text: | {{ range .Alerts }} *Alert:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Severity:* {{ .Labels.severity }} *Instance:* {{ .Labels.instance }} *Service:* {{ .Labels.service }} {{ end }} send_resolved: true # Critical P2P network alerts - name: 'p2p-critical' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-critical' title: '🚨 CRITICAL P2P Network Issue' text: | {{ range .Alerts }} *CRITICAL P2P ALERT* *Summary:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Node:* {{ .Labels.instance }} *Time:* {{ .StartsAt.Format "2006-01-02 15:04:05" }} *Immediate Action Required* {{ end }} send_resolved: true pagerduty_configs: - service_key: 'YOUR_PAGERDUTY_SERVICE_KEY' description: '{{ .GroupLabels.alertname }} - {{ .Annotations.summary }}' # DHT network alerts - name: 'dht-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-dht' title: '🔗 DHT Network Alert' text: | {{ range .Alerts }} *DHT Network Issue* *Alert:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} *Bootstrap Node:* {{ .Labels.instance }} *Peers Connected:* {{ .Labels.peer_count | default "unknown" }} {{ end }} send_resolved: true # Storage alerts - name: 'storage-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-storage' title: '💾 Content Store Alert' text: | {{ range .Alerts }} *Storage Alert* *Issue:* {{ .Annotations.summary }} *Details:* {{ .Annotations.description }} *Node:* {{ .Labels.instance }} *Usage:* {{ .Labels.disk_usage | default "unknown" }}% {{ end }} send_resolved: true # OpenAI cost alerts - name: 'cost-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-costs' title: '💰 OpenAI Cost Alert' text: | {{ range .Alerts }} *Cost Alert* *Alert:* {{ .Annotations.summary }} *Current Cost:* ${{ .Labels.current_cost | default "unknown" }} *Threshold:* ${{ .Labels.cost_threshold | default "unknown" }} *Period:* {{ .Labels.cost_period | default "daily" }} *Action:* {{ .Annotations.description }} {{ end }} send_resolved: true email_configs: - to: 'finance@deepblack.cloud' subject: 'BZZZ v2 OpenAI Cost Alert' body: | OpenAI usage has exceeded cost thresholds. {{ range .Alerts }} Alert: {{ .Annotations.summary }} Current Cost: ${{ .Labels.current_cost }} Threshold: ${{ .Labels.cost_threshold }} {{ end }} # Service health alerts - name: 'service-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-services' title: '🔧 Service Health Alert' text: | {{ range .Alerts }} *Service Health Issue* *Service:* {{ .Labels.service }} *Alert:* {{ .Annotations.summary }} *Node:* {{ .Labels.instance }} *Status:* {{ .Labels.status | default "unknown" }} *Description:* {{ .Annotations.description }} {{ end }} send_resolved: true # Resource alerts - name: 'resource-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-resources' title: '⚡ Resource Alert' text: | {{ range .Alerts }} *Resource Warning* *Resource:* {{ .Labels.resource_type | default "unknown" }} *Node:* {{ .Labels.instance }} *Alert:* {{ .Annotations.summary }} *Current Usage:* {{ .Labels.usage_percent | default "unknown" }}% *Threshold:* {{ .Labels.threshold | default "unknown" }}% {{ end }} send_resolved: true # Security alerts - name: 'security-alerts' slack_configs: - api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK' channel: '#bzzz-security' title: '🔒 Security Alert' text: | {{ range .Alerts }} *SECURITY ALERT* *Type:* {{ .Labels.security_type | default "unknown" }} *Alert:* {{ .Annotations.summary }} *Source:* {{ .Labels.instance }} *Details:* {{ .Annotations.description }} *Severity:* {{ .Labels.severity }} {{ end }} send_resolved: true email_configs: - to: 'security@deepblack.cloud' subject: 'BZZZ v2 Security Alert' body: | Security alert triggered in BZZZ v2 cluster. {{ range .Alerts }} Alert: {{ .Annotations.summary }} Severity: {{ .Labels.severity }} Source: {{ .Labels.instance }} Details: {{ .Annotations.description }} {{ end }}