Comprehensive multi-agent implementation addressing all issues from INDEX.md: ## Core Architecture & Validation - ✅ Issue 001: UCXL address validation at all system boundaries - ✅ Issue 002: Fixed search parsing bug in encrypted storage - ✅ Issue 003: Wired UCXI P2P announce and discover functionality - ✅ Issue 011: Aligned temporal grammar and documentation - ✅ Issue 012: SLURP idempotency, backpressure, and DLQ implementation - ✅ Issue 013: Linked SLURP events to UCXL decisions and DHT ## API Standardization & Configuration - ✅ Issue 004: Standardized UCXI payloads to UCXL codes - ✅ Issue 010: Status endpoints and configuration surface ## Infrastructure & Operations - ✅ Issue 005: Election heartbeat on admin transition - ✅ Issue 006: Active health checks for PubSub and DHT - ✅ Issue 007: DHT replication and provider records - ✅ Issue 014: SLURP leadership lifecycle and health probes - ✅ Issue 015: Comprehensive monitoring, SLOs, and alerts ## Security & Access Control - ✅ Issue 008: Key rotation and role-based access policies ## Testing & Quality Assurance - ✅ Issue 009: Integration tests for UCXI + DHT encryption + search - ✅ Issue 016: E2E tests for HMMM → SLURP → UCXL workflow ## HMMM Integration - ✅ Issue 017: HMMM adapter wiring and comprehensive testing ## Key Features Delivered: - Enterprise-grade security with automated key rotation - Comprehensive monitoring with Prometheus/Grafana stack - Role-based collaboration with HMMM integration - Complete API standardization with UCXL response formats - Full test coverage with integration and E2E testing - Production-ready infrastructure monitoring and alerting All solutions include comprehensive testing, documentation, and production-ready implementations. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
615 lines
19 KiB
Bash
Executable File
615 lines
19 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# BZZZ Enhanced Monitoring Stack Deployment Script
|
|
# Deploys comprehensive monitoring, metrics, and health checking infrastructure
|
|
|
|
set -euo pipefail
|
|
|
|
# Script configuration
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
LOG_FILE="/tmp/bzzz-deploy-${TIMESTAMP}.log"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
ENVIRONMENT=${ENVIRONMENT:-"production"}
|
|
DRY_RUN=${DRY_RUN:-"false"}
|
|
BACKUP_EXISTING=${BACKUP_EXISTING:-"true"}
|
|
HEALTH_CHECK_TIMEOUT=${HEALTH_CHECK_TIMEOUT:-300}
|
|
|
|
# Docker configuration
|
|
DOCKER_REGISTRY="registry.home.deepblack.cloud"
|
|
STACK_NAME="bzzz-monitoring-v2"
|
|
CONFIG_VERSION="v2"
|
|
|
|
# Logging function
|
|
log() {
|
|
local level=$1
|
|
shift
|
|
local message="$*"
|
|
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
case $level in
|
|
ERROR)
|
|
echo -e "${RED}[ERROR]${NC} $message" >&2
|
|
;;
|
|
WARN)
|
|
echo -e "${YELLOW}[WARN]${NC} $message"
|
|
;;
|
|
INFO)
|
|
echo -e "${GREEN}[INFO]${NC} $message"
|
|
;;
|
|
DEBUG)
|
|
echo -e "${BLUE}[DEBUG]${NC} $message"
|
|
;;
|
|
esac
|
|
|
|
echo "[$timestamp] [$level] $message" >> "$LOG_FILE"
|
|
}
|
|
|
|
# Error handler
|
|
error_handler() {
|
|
local line_no=$1
|
|
log ERROR "Script failed at line $line_no"
|
|
log ERROR "Check log file: $LOG_FILE"
|
|
exit 1
|
|
}
|
|
trap 'error_handler $LINENO' ERR
|
|
|
|
# Check prerequisites
|
|
check_prerequisites() {
|
|
log INFO "Checking prerequisites..."
|
|
|
|
# Check if running on Docker Swarm manager
|
|
if ! docker info --format '{{.Swarm.LocalNodeState}}' | grep -q "active"; then
|
|
log ERROR "This script must be run on a Docker Swarm manager node"
|
|
exit 1
|
|
fi
|
|
|
|
# Check required tools
|
|
local required_tools=("docker" "jq" "curl")
|
|
for tool in "${required_tools[@]}"; do
|
|
if ! command -v "$tool" >/dev/null 2>&1; then
|
|
log ERROR "Required tool not found: $tool"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Check network connectivity to registry
|
|
if ! docker pull "$DOCKER_REGISTRY/bzzz:v2.0.0" >/dev/null 2>&1; then
|
|
log WARN "Unable to pull from registry, using local images"
|
|
fi
|
|
|
|
log INFO "Prerequisites check completed"
|
|
}
|
|
|
|
# Create necessary directories
|
|
setup_directories() {
|
|
log INFO "Setting up directories..."
|
|
|
|
local dirs=(
|
|
"/rust/bzzz-v2/monitoring/prometheus/data"
|
|
"/rust/bzzz-v2/monitoring/grafana/data"
|
|
"/rust/bzzz-v2/monitoring/alertmanager/data"
|
|
"/rust/bzzz-v2/monitoring/loki/data"
|
|
"/rust/bzzz-v2/backups/monitoring"
|
|
)
|
|
|
|
for dir in "${dirs[@]}"; do
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
sudo mkdir -p "$dir"
|
|
sudo chown -R 65534:65534 "$dir" # nobody user for containers
|
|
fi
|
|
log DEBUG "Created directory: $dir"
|
|
done
|
|
}
|
|
|
|
# Backup existing configuration
|
|
backup_existing_config() {
|
|
if [[ "$BACKUP_EXISTING" != "true" ]]; then
|
|
log INFO "Skipping backup (BACKUP_EXISTING=false)"
|
|
return
|
|
fi
|
|
|
|
log INFO "Backing up existing configuration..."
|
|
|
|
local backup_dir="/rust/bzzz-v2/backups/monitoring/backup_${TIMESTAMP}"
|
|
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
mkdir -p "$backup_dir"
|
|
|
|
# Backup Docker secrets
|
|
docker secret ls --filter name=bzzz_ --format "{{.Name}}" | while read -r secret; do
|
|
if docker secret inspect "$secret" >/dev/null 2>&1; then
|
|
docker secret inspect "$secret" > "$backup_dir/${secret}.json"
|
|
log DEBUG "Backed up secret: $secret"
|
|
fi
|
|
done
|
|
|
|
# Backup Docker configs
|
|
docker config ls --filter name=bzzz_ --format "{{.Name}}" | while read -r config; do
|
|
if docker config inspect "$config" >/dev/null 2>&1; then
|
|
docker config inspect "$config" > "$backup_dir/${config}.json"
|
|
log DEBUG "Backed up config: $config"
|
|
fi
|
|
done
|
|
|
|
# Backup service definitions
|
|
if docker stack services "$STACK_NAME" >/dev/null 2>&1; then
|
|
docker stack services "$STACK_NAME" --format "{{.Name}}" | while read -r service; do
|
|
docker service inspect "$service" > "$backup_dir/${service}-service.json"
|
|
done
|
|
fi
|
|
fi
|
|
|
|
log INFO "Backup completed: $backup_dir"
|
|
}
|
|
|
|
# Create Docker secrets
|
|
create_secrets() {
|
|
log INFO "Creating Docker secrets..."
|
|
|
|
local secrets=(
|
|
"bzzz_grafana_admin_password:$(openssl rand -base64 32)"
|
|
"bzzz_postgres_password:$(openssl rand -base64 32)"
|
|
)
|
|
|
|
# Check if secrets directory exists
|
|
local secrets_dir="$HOME/chorus/business/secrets"
|
|
if [[ -d "$secrets_dir" ]]; then
|
|
# Use existing secrets if available
|
|
if [[ -f "$secrets_dir/grafana-admin-password" ]]; then
|
|
secrets[0]="bzzz_grafana_admin_password:$(cat "$secrets_dir/grafana-admin-password")"
|
|
fi
|
|
if [[ -f "$secrets_dir/postgres-password" ]]; then
|
|
secrets[1]="bzzz_postgres_password:$(cat "$secrets_dir/postgres-password")"
|
|
fi
|
|
fi
|
|
|
|
for secret_def in "${secrets[@]}"; do
|
|
local secret_name="${secret_def%%:*}"
|
|
local secret_value="${secret_def#*:}"
|
|
|
|
if docker secret inspect "$secret_name" >/dev/null 2>&1; then
|
|
log DEBUG "Secret already exists: $secret_name"
|
|
else
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
echo "$secret_value" | docker secret create "$secret_name" -
|
|
log INFO "Created secret: $secret_name"
|
|
else
|
|
log DEBUG "Would create secret: $secret_name"
|
|
fi
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Create Docker configs
|
|
create_configs() {
|
|
log INFO "Creating Docker configs..."
|
|
|
|
local configs=(
|
|
"bzzz_prometheus_config_${CONFIG_VERSION}:${PROJECT_ROOT}/monitoring/configs/prometheus.yml"
|
|
"bzzz_prometheus_alerts_${CONFIG_VERSION}:${PROJECT_ROOT}/monitoring/configs/enhanced-alert-rules.yml"
|
|
"bzzz_grafana_datasources_${CONFIG_VERSION}:${PROJECT_ROOT}/monitoring/configs/grafana-datasources.yml"
|
|
"bzzz_alertmanager_config_${CONFIG_VERSION}:${PROJECT_ROOT}/monitoring/configs/alertmanager.yml"
|
|
)
|
|
|
|
for config_def in "${configs[@]}"; do
|
|
local config_name="${config_def%%:*}"
|
|
local config_file="${config_def#*:}"
|
|
|
|
if [[ ! -f "$config_file" ]]; then
|
|
log WARN "Config file not found: $config_file"
|
|
continue
|
|
fi
|
|
|
|
if docker config inspect "$config_name" >/dev/null 2>&1; then
|
|
log DEBUG "Config already exists: $config_name"
|
|
# Remove old config if exists
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
local old_config_name="${config_name%_${CONFIG_VERSION}}"
|
|
if docker config inspect "$old_config_name" >/dev/null 2>&1; then
|
|
docker config rm "$old_config_name" || true
|
|
fi
|
|
fi
|
|
else
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
docker config create "$config_name" "$config_file"
|
|
log INFO "Created config: $config_name"
|
|
else
|
|
log DEBUG "Would create config: $config_name from $config_file"
|
|
fi
|
|
fi
|
|
done
|
|
}
|
|
|
|
# Create missing config files
|
|
create_missing_configs() {
|
|
log INFO "Creating missing configuration files..."
|
|
|
|
# Create Grafana datasources config
|
|
local grafana_datasources="${PROJECT_ROOT}/monitoring/configs/grafana-datasources.yml"
|
|
if [[ ! -f "$grafana_datasources" ]]; then
|
|
cat > "$grafana_datasources" <<EOF
|
|
apiVersion: 1
|
|
|
|
datasources:
|
|
- name: Prometheus
|
|
type: prometheus
|
|
access: proxy
|
|
url: http://prometheus:9090
|
|
isDefault: true
|
|
editable: true
|
|
|
|
- name: Loki
|
|
type: loki
|
|
access: proxy
|
|
url: http://loki:3100
|
|
editable: true
|
|
|
|
- name: Jaeger
|
|
type: jaeger
|
|
access: proxy
|
|
url: http://jaeger:16686
|
|
editable: true
|
|
EOF
|
|
log INFO "Created Grafana datasources config"
|
|
fi
|
|
|
|
# Create AlertManager config
|
|
local alertmanager_config="${PROJECT_ROOT}/monitoring/configs/alertmanager.yml"
|
|
if [[ ! -f "$alertmanager_config" ]]; then
|
|
cat > "$alertmanager_config" <<EOF
|
|
global:
|
|
smtp_smarthost: 'localhost:587'
|
|
smtp_from: 'alerts@chorus.services'
|
|
slack_api_url_file: '/run/secrets/slack_webhook_url'
|
|
|
|
route:
|
|
group_by: ['alertname', 'cluster', 'service']
|
|
group_wait: 10s
|
|
group_interval: 10s
|
|
repeat_interval: 12h
|
|
receiver: 'default'
|
|
routes:
|
|
- match:
|
|
severity: critical
|
|
receiver: 'critical-alerts'
|
|
- match:
|
|
service: bzzz
|
|
receiver: 'bzzz-alerts'
|
|
|
|
receivers:
|
|
- name: 'default'
|
|
slack_configs:
|
|
- channel: '#bzzz-alerts'
|
|
title: 'BZZZ Alert: {{ .CommonAnnotations.summary }}'
|
|
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
|
|
|
|
- name: 'critical-alerts'
|
|
slack_configs:
|
|
- channel: '#bzzz-critical'
|
|
title: 'CRITICAL: {{ .CommonAnnotations.summary }}'
|
|
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
|
|
|
|
- name: 'bzzz-alerts'
|
|
slack_configs:
|
|
- channel: '#bzzz-alerts'
|
|
title: 'BZZZ: {{ .CommonAnnotations.summary }}'
|
|
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
|
|
EOF
|
|
log INFO "Created AlertManager config"
|
|
fi
|
|
}
|
|
|
|
# Deploy monitoring stack
|
|
deploy_monitoring_stack() {
|
|
log INFO "Deploying monitoring stack..."
|
|
|
|
local compose_file="${PROJECT_ROOT}/monitoring/docker-compose.enhanced.yml"
|
|
|
|
if [[ ! -f "$compose_file" ]]; then
|
|
log ERROR "Compose file not found: $compose_file"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
# Deploy the stack
|
|
docker stack deploy -c "$compose_file" "$STACK_NAME"
|
|
log INFO "Stack deployment initiated: $STACK_NAME"
|
|
|
|
# Wait for services to be ready
|
|
log INFO "Waiting for services to be ready..."
|
|
local max_attempts=30
|
|
local attempt=0
|
|
|
|
while [[ $attempt -lt $max_attempts ]]; do
|
|
local ready_services=0
|
|
local total_services=0
|
|
|
|
# Count ready services
|
|
while read -r service; do
|
|
total_services=$((total_services + 1))
|
|
local replicas_info
|
|
replicas_info=$(docker service ls --filter name="$service" --format "{{.Replicas}}")
|
|
|
|
if [[ "$replicas_info" =~ ^([0-9]+)/([0-9]+)$ ]]; then
|
|
local current="${BASH_REMATCH[1]}"
|
|
local desired="${BASH_REMATCH[2]}"
|
|
|
|
if [[ "$current" -eq "$desired" ]]; then
|
|
ready_services=$((ready_services + 1))
|
|
fi
|
|
fi
|
|
done < <(docker stack services "$STACK_NAME" --format "{{.Name}}")
|
|
|
|
if [[ $ready_services -eq $total_services ]]; then
|
|
log INFO "All services are ready ($ready_services/$total_services)"
|
|
break
|
|
else
|
|
log DEBUG "Services ready: $ready_services/$total_services"
|
|
sleep 10
|
|
attempt=$((attempt + 1))
|
|
fi
|
|
done
|
|
|
|
if [[ $attempt -eq $max_attempts ]]; then
|
|
log WARN "Timeout waiting for all services to be ready"
|
|
fi
|
|
else
|
|
log DEBUG "Would deploy stack with compose file: $compose_file"
|
|
fi
|
|
}
|
|
|
|
# Perform health checks
|
|
perform_health_checks() {
|
|
log INFO "Performing health checks..."
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
log DEBUG "Skipping health checks in dry run mode"
|
|
return
|
|
fi
|
|
|
|
local endpoints=(
|
|
"http://localhost:9090/-/healthy:Prometheus"
|
|
"http://localhost:3000/api/health:Grafana"
|
|
"http://localhost:9093/-/healthy:AlertManager"
|
|
)
|
|
|
|
local max_attempts=$((HEALTH_CHECK_TIMEOUT / 10))
|
|
local attempt=0
|
|
|
|
while [[ $attempt -lt $max_attempts ]]; do
|
|
local healthy_endpoints=0
|
|
|
|
for endpoint_def in "${endpoints[@]}"; do
|
|
local endpoint="${endpoint_def%%:*}"
|
|
local service="${endpoint_def#*:}"
|
|
|
|
if curl -sf "$endpoint" >/dev/null 2>&1; then
|
|
healthy_endpoints=$((healthy_endpoints + 1))
|
|
log DEBUG "Health check passed: $service"
|
|
else
|
|
log DEBUG "Health check pending: $service"
|
|
fi
|
|
done
|
|
|
|
if [[ $healthy_endpoints -eq ${#endpoints[@]} ]]; then
|
|
log INFO "All health checks passed"
|
|
return
|
|
fi
|
|
|
|
sleep 10
|
|
attempt=$((attempt + 1))
|
|
done
|
|
|
|
log WARN "Some health checks failed after ${HEALTH_CHECK_TIMEOUT}s timeout"
|
|
}
|
|
|
|
# Validate deployment
|
|
validate_deployment() {
|
|
log INFO "Validating deployment..."
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
log DEBUG "Skipping validation in dry run mode"
|
|
return
|
|
fi
|
|
|
|
# Check stack services
|
|
local services
|
|
services=$(docker stack services "$STACK_NAME" --format "{{.Name}}" | wc -l)
|
|
log INFO "Deployed services: $services"
|
|
|
|
# Check if Prometheus is collecting metrics
|
|
sleep 30 # Allow time for initial metric collection
|
|
|
|
if curl -sf "http://localhost:9090/api/v1/query?query=up" | jq -r '.data.result | length' | grep -q "^[1-9]"; then
|
|
log INFO "Prometheus is collecting metrics"
|
|
else
|
|
log WARN "Prometheus may not be collecting metrics yet"
|
|
fi
|
|
|
|
# Check if Grafana can connect to Prometheus
|
|
local grafana_health
|
|
if grafana_health=$(curl -sf "http://admin:admin@localhost:3000/api/datasources/proxy/1/api/v1/query?query=up" 2>/dev/null); then
|
|
log INFO "Grafana can connect to Prometheus"
|
|
else
|
|
log WARN "Grafana datasource connection may be pending"
|
|
fi
|
|
|
|
# Check AlertManager configuration
|
|
if curl -sf "http://localhost:9093/api/v1/status" >/dev/null 2>&1; then
|
|
log INFO "AlertManager is operational"
|
|
else
|
|
log WARN "AlertManager may not be ready"
|
|
fi
|
|
}
|
|
|
|
# Import Grafana dashboards
|
|
import_dashboards() {
|
|
log INFO "Importing Grafana dashboards..."
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
log DEBUG "Skipping dashboard import in dry run mode"
|
|
return
|
|
fi
|
|
|
|
# Wait for Grafana to be ready
|
|
local max_attempts=30
|
|
local attempt=0
|
|
|
|
while [[ $attempt -lt $max_attempts ]]; do
|
|
if curl -sf "http://admin:admin@localhost:3000/api/health" >/dev/null 2>&1; then
|
|
break
|
|
fi
|
|
sleep 5
|
|
attempt=$((attempt + 1))
|
|
done
|
|
|
|
if [[ $attempt -eq $max_attempts ]]; then
|
|
log WARN "Grafana not ready for dashboard import"
|
|
return
|
|
fi
|
|
|
|
# Import dashboards
|
|
local dashboard_dir="${PROJECT_ROOT}/monitoring/grafana-dashboards"
|
|
if [[ -d "$dashboard_dir" ]]; then
|
|
for dashboard_file in "$dashboard_dir"/*.json; do
|
|
if [[ -f "$dashboard_file" ]]; then
|
|
local dashboard_name
|
|
dashboard_name=$(basename "$dashboard_file" .json)
|
|
|
|
if curl -X POST \
|
|
-H "Content-Type: application/json" \
|
|
-d "@$dashboard_file" \
|
|
"http://admin:admin@localhost:3000/api/dashboards/db" \
|
|
>/dev/null 2>&1; then
|
|
log INFO "Imported dashboard: $dashboard_name"
|
|
else
|
|
log WARN "Failed to import dashboard: $dashboard_name"
|
|
fi
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
# Generate deployment report
|
|
generate_report() {
|
|
log INFO "Generating deployment report..."
|
|
|
|
local report_file="/tmp/bzzz-monitoring-deployment-report-${TIMESTAMP}.txt"
|
|
|
|
cat > "$report_file" <<EOF
|
|
BZZZ Enhanced Monitoring Stack Deployment Report
|
|
================================================
|
|
|
|
Deployment Time: $(date)
|
|
Environment: $ENVIRONMENT
|
|
Stack Name: $STACK_NAME
|
|
Dry Run: $DRY_RUN
|
|
|
|
Services Deployed:
|
|
EOF
|
|
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
docker stack services "$STACK_NAME" --format " - {{.Name}}: {{.Replicas}}" >> "$report_file"
|
|
|
|
echo "" >> "$report_file"
|
|
echo "Service Health:" >> "$report_file"
|
|
|
|
# Add health check results
|
|
local health_endpoints=(
|
|
"http://localhost:9090/-/healthy:Prometheus"
|
|
"http://localhost:3000/api/health:Grafana"
|
|
"http://localhost:9093/-/healthy:AlertManager"
|
|
)
|
|
|
|
for endpoint_def in "${health_endpoints[@]}"; do
|
|
local endpoint="${endpoint_def%%:*}"
|
|
local service="${endpoint_def#*:}"
|
|
|
|
if curl -sf "$endpoint" >/dev/null 2>&1; then
|
|
echo " - $service: ✅ Healthy" >> "$report_file"
|
|
else
|
|
echo " - $service: ❌ Unhealthy" >> "$report_file"
|
|
fi
|
|
done
|
|
else
|
|
echo " [Dry run mode - no services deployed]" >> "$report_file"
|
|
fi
|
|
|
|
cat >> "$report_file" <<EOF
|
|
|
|
Access URLs:
|
|
- Grafana: http://localhost:3000 (admin/admin)
|
|
- Prometheus: http://localhost:9090
|
|
- AlertManager: http://localhost:9093
|
|
|
|
Configuration:
|
|
- Log file: $LOG_FILE
|
|
- Backup directory: /rust/bzzz-v2/backups/monitoring/backup_${TIMESTAMP}
|
|
- Config version: $CONFIG_VERSION
|
|
|
|
Next Steps:
|
|
1. Change default Grafana admin password
|
|
2. Configure notification channels in AlertManager
|
|
3. Review and customize alert rules
|
|
4. Set up external authentication (optional)
|
|
|
|
EOF
|
|
|
|
log INFO "Deployment report generated: $report_file"
|
|
|
|
# Display report
|
|
echo ""
|
|
echo "=========================================="
|
|
cat "$report_file"
|
|
echo "=========================================="
|
|
}
|
|
|
|
# Main execution
|
|
main() {
|
|
log INFO "Starting BZZZ Enhanced Monitoring Stack deployment"
|
|
log INFO "Environment: $ENVIRONMENT, Dry Run: $DRY_RUN"
|
|
log INFO "Log file: $LOG_FILE"
|
|
|
|
check_prerequisites
|
|
setup_directories
|
|
backup_existing_config
|
|
create_missing_configs
|
|
create_secrets
|
|
create_configs
|
|
deploy_monitoring_stack
|
|
perform_health_checks
|
|
validate_deployment
|
|
import_dashboards
|
|
generate_report
|
|
|
|
log INFO "Deployment completed successfully!"
|
|
|
|
if [[ "$DRY_RUN" != "true" ]]; then
|
|
echo ""
|
|
echo "🎉 BZZZ Enhanced Monitoring Stack is now running!"
|
|
echo "📊 Grafana Dashboard: http://localhost:3000"
|
|
echo "📈 Prometheus: http://localhost:9090"
|
|
echo "🚨 AlertManager: http://localhost:9093"
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo "1. Change default Grafana password"
|
|
echo "2. Configure alert notification channels"
|
|
echo "3. Review monitoring dashboards"
|
|
echo "4. Run reliability tests: ./infrastructure/testing/run-tests.sh all"
|
|
fi
|
|
}
|
|
|
|
# Script execution
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
main "$@"
|
|
fi |