- Add FUTURE_DEVELOPMENT.md with comprehensive v2 protocol specification - Add MCP integration design and implementation foundation - Add infrastructure and deployment configurations - Update system architecture for v2 evolution 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
255 lines
7.4 KiB
YAML
255 lines
7.4 KiB
YAML
# AlertManager Configuration for BZZZ v2
|
|
|
|
global:
|
|
smtp_smarthost: 'localhost:587'
|
|
smtp_from: 'alerts@deepblack.cloud'
|
|
smtp_require_tls: true
|
|
resolve_timeout: 5m
|
|
|
|
# Template files
|
|
templates:
|
|
- '/etc/alertmanager/templates/*.tmpl'
|
|
|
|
# Route configuration
|
|
route:
|
|
group_by: ['cluster', 'alertname', 'service']
|
|
group_wait: 30s
|
|
group_interval: 5m
|
|
repeat_interval: 12h
|
|
receiver: 'default'
|
|
routes:
|
|
# Critical P2P network issues
|
|
- match:
|
|
severity: critical
|
|
component: p2p
|
|
receiver: 'p2p-critical'
|
|
group_wait: 10s
|
|
repeat_interval: 5m
|
|
|
|
# DHT network issues
|
|
- match:
|
|
component: dht
|
|
receiver: 'dht-alerts'
|
|
group_wait: 1m
|
|
repeat_interval: 30m
|
|
|
|
# Content store issues
|
|
- match:
|
|
component: content-store
|
|
receiver: 'storage-alerts'
|
|
group_wait: 2m
|
|
repeat_interval: 1h
|
|
|
|
# OpenAI cost alerts
|
|
- match:
|
|
component: openai-cost
|
|
receiver: 'cost-alerts'
|
|
group_wait: 5m
|
|
repeat_interval: 6h
|
|
|
|
# Service health alerts
|
|
- match:
|
|
component: service-health
|
|
receiver: 'service-alerts'
|
|
group_wait: 1m
|
|
repeat_interval: 15m
|
|
|
|
# Resource exhaustion
|
|
- match:
|
|
severity: warning
|
|
component: resources
|
|
receiver: 'resource-alerts'
|
|
group_wait: 5m
|
|
repeat_interval: 2h
|
|
|
|
# Security alerts
|
|
- match:
|
|
component: security
|
|
receiver: 'security-alerts'
|
|
group_wait: 30s
|
|
repeat_interval: 1h
|
|
|
|
# Inhibition rules
|
|
inhibit_rules:
|
|
# Silence warning if critical alert is firing
|
|
- source_match:
|
|
severity: 'critical'
|
|
target_match:
|
|
severity: 'warning'
|
|
equal: ['cluster', 'service', 'instance']
|
|
|
|
# Silence service alerts if node is down
|
|
- source_match:
|
|
alertname: 'NodeDown'
|
|
target_match:
|
|
component: 'service-health'
|
|
equal: ['instance']
|
|
|
|
# Receiver configurations
|
|
receivers:
|
|
# Default receiver
|
|
- name: 'default'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-monitoring'
|
|
title: 'BZZZ v2 Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Severity:* {{ .Labels.severity }}
|
|
*Instance:* {{ .Labels.instance }}
|
|
*Service:* {{ .Labels.service }}
|
|
{{ end }}
|
|
send_resolved: true
|
|
|
|
# Critical P2P network alerts
|
|
- name: 'p2p-critical'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-critical'
|
|
title: '🚨 CRITICAL P2P Network Issue'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*CRITICAL P2P ALERT*
|
|
|
|
*Summary:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Node:* {{ .Labels.instance }}
|
|
*Time:* {{ .StartsAt.Format "2006-01-02 15:04:05" }}
|
|
|
|
*Immediate Action Required*
|
|
{{ end }}
|
|
send_resolved: true
|
|
pagerduty_configs:
|
|
- service_key: 'YOUR_PAGERDUTY_SERVICE_KEY'
|
|
description: '{{ .GroupLabels.alertname }} - {{ .Annotations.summary }}'
|
|
|
|
# DHT network alerts
|
|
- name: 'dht-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-dht'
|
|
title: '🔗 DHT Network Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*DHT Network Issue*
|
|
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Description:* {{ .Annotations.description }}
|
|
*Bootstrap Node:* {{ .Labels.instance }}
|
|
*Peers Connected:* {{ .Labels.peer_count | default "unknown" }}
|
|
{{ end }}
|
|
send_resolved: true
|
|
|
|
# Storage alerts
|
|
- name: 'storage-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-storage'
|
|
title: '💾 Content Store Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*Storage Alert*
|
|
|
|
*Issue:* {{ .Annotations.summary }}
|
|
*Details:* {{ .Annotations.description }}
|
|
*Node:* {{ .Labels.instance }}
|
|
*Usage:* {{ .Labels.disk_usage | default "unknown" }}%
|
|
{{ end }}
|
|
send_resolved: true
|
|
|
|
# OpenAI cost alerts
|
|
- name: 'cost-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-costs'
|
|
title: '💰 OpenAI Cost Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*Cost Alert*
|
|
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Current Cost:* ${{ .Labels.current_cost | default "unknown" }}
|
|
*Threshold:* ${{ .Labels.cost_threshold | default "unknown" }}
|
|
*Period:* {{ .Labels.cost_period | default "daily" }}
|
|
*Action:* {{ .Annotations.description }}
|
|
{{ end }}
|
|
send_resolved: true
|
|
email_configs:
|
|
- to: 'finance@deepblack.cloud'
|
|
subject: 'BZZZ v2 OpenAI Cost Alert'
|
|
body: |
|
|
OpenAI usage has exceeded cost thresholds.
|
|
|
|
{{ range .Alerts }}
|
|
Alert: {{ .Annotations.summary }}
|
|
Current Cost: ${{ .Labels.current_cost }}
|
|
Threshold: ${{ .Labels.cost_threshold }}
|
|
{{ end }}
|
|
|
|
# Service health alerts
|
|
- name: 'service-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-services'
|
|
title: '🔧 Service Health Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*Service Health Issue*
|
|
|
|
*Service:* {{ .Labels.service }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Node:* {{ .Labels.instance }}
|
|
*Status:* {{ .Labels.status | default "unknown" }}
|
|
*Description:* {{ .Annotations.description }}
|
|
{{ end }}
|
|
send_resolved: true
|
|
|
|
# Resource alerts
|
|
- name: 'resource-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-resources'
|
|
title: '⚡ Resource Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*Resource Warning*
|
|
|
|
*Resource:* {{ .Labels.resource_type | default "unknown" }}
|
|
*Node:* {{ .Labels.instance }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Current Usage:* {{ .Labels.usage_percent | default "unknown" }}%
|
|
*Threshold:* {{ .Labels.threshold | default "unknown" }}%
|
|
{{ end }}
|
|
send_resolved: true
|
|
|
|
# Security alerts
|
|
- name: 'security-alerts'
|
|
slack_configs:
|
|
- api_url: 'https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK'
|
|
channel: '#bzzz-security'
|
|
title: '🔒 Security Alert'
|
|
text: |
|
|
{{ range .Alerts }}
|
|
*SECURITY ALERT*
|
|
|
|
*Type:* {{ .Labels.security_type | default "unknown" }}
|
|
*Alert:* {{ .Annotations.summary }}
|
|
*Source:* {{ .Labels.instance }}
|
|
*Details:* {{ .Annotations.description }}
|
|
*Severity:* {{ .Labels.severity }}
|
|
{{ end }}
|
|
send_resolved: true
|
|
email_configs:
|
|
- to: 'security@deepblack.cloud'
|
|
subject: 'BZZZ v2 Security Alert'
|
|
body: |
|
|
Security alert triggered in BZZZ v2 cluster.
|
|
|
|
{{ range .Alerts }}
|
|
Alert: {{ .Annotations.summary }}
|
|
Severity: {{ .Labels.severity }}
|
|
Source: {{ .Labels.instance }}
|
|
Details: {{ .Annotations.description }}
|
|
{{ end }} |