Files
hive/monitoring/alert_rules.yml
anthonyrawlins 268214d971 Major WHOOSH system refactoring and feature enhancements
- Migrated from HIVE branding to WHOOSH across all components
- Enhanced backend API with new services: AI models, BZZZ integration, templates, members
- Added comprehensive testing suite with security, performance, and integration tests
- Improved frontend with new components for project setup, AI models, and team management
- Updated MCP server implementation with WHOOSH-specific tools and resources
- Enhanced deployment configurations with production-ready Docker setups
- Added comprehensive documentation and setup guides
- Implemented age encryption service and UCXL integration

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-27 08:34:48 +10:00

74 lines
2.4 KiB
YAML

groups:
- name: whoosh_alerts
rules:
- alert: WhooshBackendDown
expr: up{job="whoosh-backend"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "WHOOSH Backend is down"
description: "WHOOSH Backend has been down for more than 1 minute."
- alert: WhooshHighResponseTime
expr: histogram_quantile(0.95, http_request_duration_seconds_bucket{job="whoosh-backend"}) > 2
for: 5m
labels:
severity: warning
annotations:
summary: "High response time on WHOOSH Backend"
description: "95th percentile response time is {{ $value }}s"
- alert: WhooshHighErrorRate
expr: rate(http_requests_total{job="whoosh-backend",status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "High error rate on WHOOSH Backend"
description: "Error rate is {{ $value }} errors per second"
- alert: PostgreSQLDown
expr: up{job="whoosh-postgres"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been down for more than 1 minute."
- alert: PostgreSQLHighConnections
expr: pg_stat_database_numbackends > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High number of PostgreSQL connections"
description: "Number of connections is {{ $value }}"
- alert: RedisDown
expr: up{job="whoosh-redis"} == 0
for: 1m
labels:
severity: warning
annotations:
summary: "Redis is down"
description: "Redis cache has been down for more than 1 minute."
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage"
description: "Memory usage is {{ $value | humanizePercentage }}"
- alert: HighDiskUsage
expr: (node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "High disk usage"
description: "Disk usage is {{ $value | humanizePercentage }}"